perobs 4.0.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -39,7 +39,10 @@ module PEROBS
39
39
  # mark a node as leaf or branch node.
40
40
  class BTreeNode
41
41
 
42
- attr_reader :node_address, :parent, :is_leaf, :keys, :values, :children
42
+ Stats = Struct.new(:branch_depth, :nodes_count, :leave_nodes, :leaves)
43
+
44
+ attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling,
45
+ :keys, :values, :children
43
46
 
44
47
  # Create a new BTreeNode object for the given tree with the given parent
45
48
  # or recreate the node with the given node_address from the backing store.
@@ -48,47 +51,43 @@ module PEROBS
48
51
  # restore the node.
49
52
  # @param tree [BTree] The tree this node is part of
50
53
  # @param parent [BTreeNode] reference to parent node
54
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
55
+ # @param next_sibling [BTreeNode] reference to next sibling node
51
56
  # @param node_address [Integer] the address of the node to read from the
52
57
  # backing store
53
58
  # @param is_leaf [Boolean] true if the node should be a leaf node, false
54
59
  # if not
55
60
  def initialize(tree, node_address = nil, parent = nil, is_leaf = true,
56
- keys = [], values = [], children = [])
61
+ prev_sibling = nil, next_sibling = nil,
62
+ keys = nil, values = nil, children = nil)
57
63
  @tree = tree
58
64
  if node_address == 0
59
65
  PEROBS.log.fatal "Node address may not be 0"
60
66
  end
61
67
  @node_address = node_address
62
- @parent = parent ? BTreeNodeLink.new(tree, parent) : nil
63
- @keys = keys
68
+ @parent = link(parent)
69
+ @prev_sibling = link(prev_sibling)
70
+ @next_sibling = link(next_sibling)
71
+ @keys = keys || []
64
72
  if (@is_leaf = is_leaf)
65
- @values = values
66
- @children = []
73
+ @values = values || []
74
+ @children = nil
67
75
  else
68
- @children = children
69
- @values = []
76
+ @children = children || []
77
+ @values = nil
70
78
  end
71
-
72
- ObjectSpace.define_finalizer(
73
- self, BTreeNode._finalize(@tree, @node_address, object_id))
74
- @tree.node_cache.insert(self, false)
75
- end
76
-
77
- # This method generates the destructor for the objects of this class. It
78
- # is done this way to prevent the Proc object hanging on to a reference to
79
- # self which would prevent the object from being collected. This internal
80
- # method is not intended for users to call.
81
- def BTreeNode::_finalize(tree, node_address, ruby_object_id)
82
- proc { tree.node_cache._collect(node_address, ruby_object_id) }
83
79
  end
84
80
 
85
- # Create a new SpaceTreeNode. This method should be used for the creation
81
+ # Create a new BTreeNode. This method should be used for the creation
86
82
  # of new nodes instead of calling the constructor directly.
87
83
  # @param tree [BTree] The tree the new node should belong to
88
84
  # @param parent [BTreeNode] The parent node
89
85
  # @param is_leaf [Boolean] True if the node has no children, false
90
86
  # otherwise
91
- def BTreeNode::create(tree, parent = nil, is_leaf = true)
87
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
88
+ # @param next_sibling [BTreeNode] reference to next sibling node
89
+ def BTreeNode::create(tree, parent = nil, is_leaf = true,
90
+ prev_sibling = nil, next_sibling = nil)
92
91
  unless parent.nil? || parent.is_a?(BTreeNode) ||
93
92
  parent.is_a?(BTreeNodeLink)
94
93
  PEROBS.log.fatal "Parent node must be a BTreeNode but is of class " +
@@ -96,17 +95,26 @@ module PEROBS
96
95
  end
97
96
 
98
97
  address = tree.nodes.free_address
99
- node = BTreeNode.new(tree, address, parent, is_leaf)
98
+ node = BTreeNode.new(tree, address, parent, is_leaf, prev_sibling,
99
+ next_sibling)
100
100
  # This is a new node. Make sure the data is written to the file.
101
101
  tree.node_cache.insert(node)
102
102
 
103
- node
103
+ # Insert the newly created node into the existing node chain.
104
+ if (node.prev_sibling = prev_sibling)
105
+ node.prev_sibling.next_sibling = BTreeNodeLink.new(tree, node)
106
+ end
107
+ if (node.next_sibling = next_sibling)
108
+ node.next_sibling.prev_sibling = BTreeNodeLink.new(tree, node)
109
+ end
110
+
111
+ BTreeNodeLink.new(tree, node)
104
112
  end
105
113
 
106
114
  # Restore a node from the backing store at the given address and tree.
107
115
  # @param tree [BTree] The tree the node belongs to
108
- # @param node_address [Integer] The address in the blob file.
109
- def BTreeNode::load(tree, address)
116
+ # @param address [Integer] The address in the blob file.
117
+ def BTreeNode::load(tree, address, unused = nil)
110
118
  unless address.is_a?(Integer)
111
119
  PEROBS.log.fatal "address is not Integer: #{address.class}"
112
120
  end
@@ -122,7 +130,8 @@ module PEROBS
122
130
  ary = bytes.unpack(BTreeNode::node_bytes_format(tree))
123
131
  # Read is_leaf
124
132
  if ary[0] != 0 && ary[0] != 1
125
- PEROBS.log.fatal "First byte of a BTreeNode entry must be 0 or 1"
133
+ PEROBS.log.fatal "First byte of a BTreeNode entry at address " +
134
+ "#{address} must be 0 or 1 but is #{ary[0]}"
126
135
  end
127
136
  is_leaf = ary[0] == 0 ? false : true
128
137
  # This is the number of keys this node has.
@@ -130,19 +139,21 @@ module PEROBS
130
139
  data_count = ary[2]
131
140
  # Read the parent node address
132
141
  parent = ary[3] == 0 ? nil : BTreeNodeLink.new(tree, ary[3])
142
+ prev_sibling = ary[4] == 0 ? nil : BTreeNodeLink.new(tree, ary[4])
143
+ next_sibling = ary[5] == 0 ? nil : BTreeNodeLink.new(tree, ary[5])
133
144
  # Read the keys
134
- keys = ary[4, key_count]
145
+ keys = ary[6, key_count]
135
146
 
136
147
  children = nil
137
148
  values = nil
138
149
  if is_leaf
139
150
  # Read the values
140
- values = ary[4 + tree.order, data_count]
151
+ values = ary[6 + tree.order, data_count]
141
152
  else
142
153
  # Read the child addresses
143
154
  children = []
144
155
  data_count.times do |i|
145
- child_address = ary[4 + tree.order + i]
156
+ child_address = ary[6 + tree.order + i]
146
157
  unless child_address > 0
147
158
  PEROBS.log.fatal "Child address must be larger than 0"
148
159
  end
@@ -150,17 +161,28 @@ module PEROBS
150
161
  end
151
162
  end
152
163
 
153
- node = BTreeNode.new(tree, address, parent, is_leaf, keys, values,
164
+ node = BTreeNode.new(tree, address, parent, is_leaf,
165
+ prev_sibling, next_sibling, keys, values,
154
166
  children)
155
167
  tree.node_cache.insert(node, false)
156
168
 
157
169
  node
158
170
  end
159
171
 
172
+ # This is a wrapper around BTreeNode::load() that returns a BTreeNodeLink
173
+ # instead of the actual node.
174
+ # @param tree [BTree] The tree the node belongs to
175
+ # @param address [Integer] The address in the blob file.
176
+ # @return [BTreeNodeLink] Link to loaded noded
177
+ def BTreeNode::load_and_link(tree, address)
178
+ BTreeNodeLink.new(tree, BTreeNode::load(tree, address))
179
+ end
180
+
181
+
160
182
  # @return [String] The format used for String.pack.
161
183
  def BTreeNode::node_bytes_format(tree)
162
184
  # This does not include the 4 bytes for the CRC32 checksum
163
- "CSSQQ#{tree.order}Q#{tree.order + 1}"
185
+ "CSSQQQQ#{tree.order}Q#{tree.order + 1}"
164
186
  end
165
187
 
166
188
  # @return [Integer] The number of bytes needed to store a node.
@@ -169,6 +191,8 @@ module PEROBS
169
191
  2 + # actual key count
170
192
  2 + # actual value or children count (aka data count)
171
193
  8 + # parent address
194
+ 8 + # previous sibling address
195
+ 8 + # next sibling address
172
196
  8 * order + # keys
173
197
  8 * (order + 1) + # values or child addresses
174
198
  4 # CRC32 checksum
@@ -200,11 +224,11 @@ module PEROBS
200
224
 
201
225
  # Once we have reached a leaf node we can insert or replace the value.
202
226
  if node.is_leaf
203
- node.insert_element(key, value)
204
- return
227
+ return node.insert_element(key, value)
205
228
  else
206
229
  # Descend into the right child node to add the value to.
207
230
  node = node.children[node.search_key_index(key)]
231
+ node = node.get_node if node
208
232
  end
209
233
  end
210
234
 
@@ -229,6 +253,62 @@ module PEROBS
229
253
 
230
254
  # Descend into the right child node to continue the search.
231
255
  node = node.children[i]
256
+ node = node.get_node if node
257
+ end
258
+
259
+ PEROBS.log.fatal "Could not find proper node to get from while " +
260
+ "looking for key #{key}"
261
+ end
262
+
263
+ # Return the key/value pair that matches the given key or the next larger
264
+ # key/value pair with a key that is at least as large as key +
265
+ # min_miss_increment.
266
+ # @param key [Integer] key to search for
267
+ # @param min_miss_increment [Integer] minimum required key increment in
268
+ # case an exact key match could not be found
269
+ # @return [Integer or nil] value that matches the key
270
+ def get_best_match(key, min_miss_increment)
271
+ node = self
272
+
273
+ while node do
274
+ # Find index of the entry that best fits the key.
275
+ i = node.search_key_index(key)
276
+ if node.is_leaf
277
+ # This is a leaf node. Check if there is an exact match for the
278
+ # given key.
279
+ if node.keys[i] == key
280
+ # Return the corresponding value/value pair.
281
+ return [ key, node.values[i] ]
282
+ else
283
+ # No exact key match. Now search the larger keys for the first
284
+ # that is at least key + min_miss_increment large.
285
+ keys = node.keys
286
+ keys_length = keys.length
287
+ while node
288
+ if i >= keys_length
289
+ # We've reached the end of a node. Continue search in next
290
+ # sibling.
291
+ return nil unless (node = node.next_sibling)
292
+ node = node.get_node
293
+ keys = node.keys
294
+ keys_length = keys.length
295
+ i = -1
296
+ elsif keys[i] >= key + min_miss_increment
297
+ # We've found a key that fits the critera. Return the
298
+ # corresponding key/value pair.
299
+ return [ keys[i], node.values[i] ]
300
+ end
301
+
302
+ i += 1
303
+ end
304
+
305
+ return nil
306
+ end
307
+ end
308
+
309
+ # Descend into the right child node to continue the search.
310
+ node = node.children[i]
311
+ node = node.get_node if node
232
312
  end
233
313
 
234
314
  PEROBS.log.fatal "Could not find proper node to get from while " +
@@ -257,6 +337,7 @@ module PEROBS
257
337
 
258
338
  # Descend into the right child node to continue the search.
259
339
  node = node.children[i]
340
+ node = node.get_node if node
260
341
  end
261
342
 
262
343
  PEROBS.log.fatal 'Could not find proper node to remove from'
@@ -269,18 +350,20 @@ module PEROBS
269
350
  def split_node
270
351
  unless @parent
271
352
  # The node is the root node. We need to create a parent node first.
272
- self.parent = BTreeNode::create(@tree, nil, false)
353
+ self.parent = link(BTreeNode::create(@tree, nil, false))
354
+ @tree.node_cache.insert(self)
273
355
  @parent.set_child(0, self)
274
356
  @tree.set_root(@parent)
275
357
  end
276
358
 
277
359
  # Create the new sibling that will take the 2nd half of the
278
360
  # node content.
279
- sibling = BTreeNode::create(@tree, @parent, @is_leaf)
361
+ sibling = BTreeNode::create(@tree, @parent, @is_leaf, link(self),
362
+ @next_sibling)
280
363
  # Determine the index of the middle element that gets moved to the
281
364
  # parent. The order must be an uneven number, so adding 1 will get us
282
365
  # the middle element.
283
- mid = @tree.order / 2 + 1
366
+ mid = @tree.order / 2
284
367
  # Insert the middle element key into the parent node
285
368
  @parent.insert_element(@keys[mid], sibling)
286
369
  copy_elements(mid + (@is_leaf ? 0 : 1), sibling)
@@ -289,89 +372,149 @@ module PEROBS
289
372
  @parent
290
373
  end
291
374
 
292
- def merge_node(upper_sibling, parent_index)
293
- if upper_sibling == self
294
- PEROBS.log.fatal "Cannot merge node @#{@node_address} with self"
295
- end
296
- unless upper_sibling.is_leaf
297
- insert_element(@parent.keys[parent_index], upper_sibling.children[0])
298
- end
299
- upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size)
300
- @tree.delete_node(upper_sibling.node_address)
301
-
302
- @parent.remove_element(parent_index)
303
- end
304
-
305
375
  # Insert the given value or child into the current node using the key as
306
376
  # index.
307
377
  # @param key [Integer] key to address the value or child
308
378
  # @param value_or_child [Integer or BTreeNode] value or BTreeNode
309
379
  # reference
380
+ # @return true for insert, false for overwrite
310
381
  def insert_element(key, value_or_child)
311
382
  if @keys.size >= @tree.order
312
383
  PEROBS.log.fatal "Cannot insert into a full BTreeNode"
313
384
  end
314
385
 
315
386
  i = search_key_index(key)
387
+ @tree.node_cache.insert(self)
316
388
  if @keys[i] == key
317
389
  # Overwrite existing entries
318
390
  @keys[i] = key
319
391
  if is_leaf
320
392
  @values[i] = value_or_child
321
393
  else
322
- @children[i + 1] = BTreeNodeLink.new(@tree, value_or_child)
394
+ @children[i + 1] = link(value_or_child)
323
395
  end
396
+
397
+ return false
324
398
  else
325
399
  # Create a new entry
326
400
  @keys.insert(i, key)
327
401
  if is_leaf
328
402
  @values.insert(i, value_or_child)
329
403
  else
330
- @children.insert(i + 1, BTreeNodeLink.new(@tree, value_or_child))
404
+ @children.insert(i + 1, link(value_or_child))
331
405
  end
406
+
407
+ return true
332
408
  end
333
- @tree.node_cache.insert(self)
334
409
  end
335
410
 
336
411
  # Remove the element at the given index.
337
412
  def remove_element(index)
338
- # We need this key to find the link in the parent node.
339
- first_key = @keys[0]
340
- removed_value = nil
341
-
342
413
  # Delete the key at the specified index.
343
- unless @keys.delete_at(index)
344
- PEROBS.log.fatal "Could not remove element #{index} from BTreeNode " +
414
+ unless (key = @keys.delete_at(index))
415
+ PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
345
416
  "@#{@node_address}"
346
417
  end
347
- if @is_leaf
348
- # For leaf nodes, also delete the corresponding value.
349
- removed_value = @values.delete_at(index)
418
+ update_branch_key(key) if index == 0
419
+
420
+ # Delete the corresponding value.
421
+ @tree.node_cache.insert(self)
422
+ removed_value = @values.delete_at(index)
423
+
424
+ if @keys.length < min_keys
425
+ if @prev_sibling && @prev_sibling.parent == @parent
426
+ borrow_from_previous_sibling(@prev_sibling) ||
427
+ @prev_sibling.merge_with_leaf_node(self)
428
+ elsif @next_sibling && @next_sibling.parent == @parent
429
+ borrow_from_next_sibling(@next_sibling) ||
430
+ merge_with_leaf_node(@next_sibling)
431
+ elsif @parent
432
+ PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
433
+ end
434
+ end
435
+
436
+ # The merge has potentially invalidated this node. After this method has
437
+ # been called this copy of the node should no longer be used.
438
+ removed_value
439
+ end
440
+
441
+ def remove_child(node)
442
+ unless (index = search_node_index(node))
443
+ PEROBS.log.fatal "Cannot remove child #{node.node_address} " +
444
+ "from node #{@node_address}"
445
+ end
446
+
447
+ @tree.node_cache.insert(self)
448
+ if index == 0
449
+ # Removing the first child is a bit more complicated as the
450
+ # corresponding branch key is in a parent node.
451
+ key = @keys.shift
452
+ update_branch_key(key)
350
453
  else
351
- # The corresponding child has can be found at 1 index higher.
352
- @children.delete_at(index + 1)
454
+ # For all other children we can just remove the corresponding key.
455
+ @keys.delete_at(index - 1)
456
+ end
457
+
458
+ # Remove the child node link.
459
+ child = @children.delete_at(index)
460
+ # Unlink the neighbouring siblings from the child
461
+ child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
462
+ child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
463
+
464
+ if @keys.length < min_keys
465
+ # The node has become too small. Try borrowing a node from an adjecent
466
+ # sibling or merge with an adjecent node.
467
+ if @prev_sibling && @prev_sibling.parent == @parent
468
+ borrow_from_previous_sibling(@prev_sibling) ||
469
+ @prev_sibling.merge_with_branch_node(self)
470
+ elsif @next_sibling && @next_sibling.parent == @parent
471
+ borrow_from_next_sibling(@next_sibling) ||
472
+ merge_with_branch_node(@next_sibling)
473
+ end
474
+ end
475
+
476
+ # Delete the node from the cache and backing store.
477
+ @tree.delete_node(node.node_address)
478
+ end
479
+
480
+ def merge_with_leaf_node(node)
481
+ if @keys.length + node.keys.length > @tree.order
482
+ PEROBS.log.fatal "Leaf nodes are too big to merge"
353
483
  end
484
+
354
485
  @tree.node_cache.insert(self)
486
+ @keys += node.keys
487
+ @values += node.values
355
488
 
356
- # Find the lower and upper siblings and the index of the key for this
357
- # node in the parent node.
358
- lower_sibling, upper_sibling, parent_index =
359
- find_closest_siblings(first_key)
489
+ node.parent.remove_child(node)
490
+ end
360
491
 
361
- if lower_sibling &&
362
- lower_sibling.keys.size + @keys.size < @tree.order
363
- lower_sibling.merge_node(self, parent_index - 1)
364
- elsif upper_sibling &&
365
- @keys.size + upper_sibling.keys.size < @tree.order
366
- merge_node(upper_sibling, parent_index)
492
+ def merge_with_branch_node(node)
493
+ if @keys.length + 1 + node.keys.length > @tree.order
494
+ PEROBS.log.fatal "Branch nodes are too big to merge"
367
495
  end
368
496
 
369
- # The merge has potentially invalidated this node. After this method has
370
- # been called this copy of the node should no longer be used.
371
- removed_value
497
+ index = @parent.search_node_index(node) - 1
498
+ @tree.node_cache.insert(self)
499
+ @keys << @parent.keys[index]
500
+ @keys += node.keys
501
+ node.children.each { |c| c.parent = link(self) }
502
+ @children += node.children
503
+
504
+ node.parent.remove_child(node)
505
+ end
506
+
507
+ def search_node_index(node)
508
+ index = search_key_index(node.keys.first)
509
+ unless @children[index] == node
510
+ raise RuntimeError, "Child at index #{index} is not the requested node"
511
+ end
512
+
513
+ index
372
514
  end
373
515
 
374
516
  def copy_elements(src_idx, dest_node, dst_idx = 0, count = nil)
517
+ dest_node = dest_node.get_node
375
518
  unless count
376
519
  count = @tree.order - src_idx
377
520
  end
@@ -383,6 +526,7 @@ module PEROBS
383
526
  "#{dest_node.is_leaf} node must be of same kind"
384
527
  end
385
528
 
529
+ @tree.node_cache.insert(dest_node)
386
530
  dest_node.keys[dst_idx, count] = @keys[src_idx, count]
387
531
  if @is_leaf
388
532
  # For leaves we copy the keys and corresponding values.
@@ -395,32 +539,59 @@ module PEROBS
395
539
  dest_node.set_child(dst_idx + i, @children[src_idx + i])
396
540
  end
397
541
  end
398
- @tree.node_cache.insert(dest_node)
399
542
  end
400
543
 
401
544
  def parent=(p)
402
- @parent = p ? BTreeNodeLink.new(@tree, p) : nil
403
545
  @tree.node_cache.insert(self)
546
+ @parent = p
547
+
548
+ p
549
+ end
550
+
551
+ def prev_sibling=(node)
552
+ @tree.node_cache.insert(self)
553
+ @prev_sibling = node
554
+ if node.nil? && @is_leaf
555
+ # If this node is a leaf node without a previous sibling we need to
556
+ # register it as the first leaf node.
557
+ @tree.set_first_leaf(BTreeNodeLink.new(@tree, self))
558
+ end
559
+
560
+ node
561
+ end
562
+
563
+ def next_sibling=(node)
564
+ @tree.node_cache.insert(self)
565
+ @next_sibling = node
566
+ if node.nil? && @is_leaf
567
+ # If this node is a leaf node without a next sibling we need to
568
+ # register it as the last leaf node.
569
+ @tree.set_last_leaf(BTreeNodeLink.new(@tree, self))
570
+ end
571
+
572
+ node
404
573
  end
405
574
 
406
575
  def set_child(index, child)
576
+ @tree.node_cache.insert(self)
407
577
  if child
408
- @children[index] = BTreeNodeLink.new(@tree, child)
409
- @children[index].parent = self
578
+ @children[index] = link(child)
579
+ @children[index].parent = link(self)
410
580
  else
411
581
  @children[index] = nil
412
582
  end
413
- @tree.node_cache.insert(self)
583
+
584
+ child
414
585
  end
415
586
 
416
587
  def trim(idx)
417
- @keys = @keys[0..idx - 1]
588
+ @tree.node_cache.insert(self)
589
+ @keys.slice!(idx, @keys.length - idx)
418
590
  if @is_leaf
419
- @values = @values[0..idx - 1]
591
+ @values.slice!(idx, @values.length - idx)
420
592
  else
421
- @children = @children[0..idx]
593
+ @children.slice!(idx + 1, @children.length - idx - 1)
422
594
  end
423
- @tree.node_cache.insert(self)
424
595
  end
425
596
 
426
597
  # Search the keys of the node that fits the given key. The result is
@@ -429,36 +600,8 @@ module PEROBS
429
600
  # @param key [Integer] key to search for
430
601
  # @return [Integer] Index of the matching key or the insert position.
431
602
  def search_key_index(key)
432
- # Handle special case for empty keys list.
433
- return 0 if @keys.empty?
434
-
435
- # Keys are unique and always sorted. Use a binary search to find the
436
- # index that fits the given key.
437
- li = pi = 0
438
- ui = @keys.size - 1
439
- while li <= ui
440
- # The pivot element is always in the middle between the lower and upper
441
- # index.
442
- pi = li + (ui - li) / 2
443
-
444
- if key < @keys[pi]
445
- # The pivot element is smaller than the key. Set the upper index to
446
- # the pivot index.
447
- ui = pi - 1
448
- elsif key > @keys[pi]
449
- # The pivot element is larger than the key. Set the lower index to
450
- # the pivot index.
451
- li = pi + 1
452
- else
453
- # We've found an exact match. For leaf nodes return the found index.
454
- # For branch nodes we have to add one to the index since the larger
455
- # child is the right one.
456
- return @is_leaf ? pi : pi + 1
457
- end
458
- end
459
- # No exact match was found. For the insert operaton we need to return
460
- # the index of the first key that is larger than the given key.
461
- @keys[pi] < key ? pi + 1 : pi
603
+ (@is_leaf ? @keys.bsearch_index { |x| x >= key } :
604
+ @keys.bsearch_index { |x| x > key }) || @keys.length
462
605
  end
463
606
 
464
607
  # Iterate over all the key/value pairs in this node and all sub-nodes.
@@ -509,17 +652,33 @@ module PEROBS
509
652
  # Check consistency of the node and all subsequent nodes. In case an error
510
653
  # is found, a message is logged and false is returned.
511
654
  # @yield [key, value]
512
- # @return [Boolean] true if tree has no errors
513
- def check
655
+ # @return [nil or Hash] nil in case of errors or a hash with some
656
+ # statistical information about the tree
657
+ def check(&block)
658
+ stats = Stats.new(nil, 0, 0, 0)
659
+
514
660
  traverse do |node, position, stack|
515
661
  if position == 0
516
- if node.parent && node.keys.size < 1
517
- node.error "BTreeNode must have at least one entry"
518
- return false
662
+ stats.nodes_count += 1
663
+ if node.parent
664
+ unless node.parent.is_a?(BTreeNodeLink)
665
+ node.error "parent is a #{node.parent.class} instead of a " +
666
+ "BTreeNodeLink"
667
+ return nil
668
+ end
669
+ # After a split the nodes will only have half the maximum keys.
670
+ # For branch nodes one of the split nodes will have even 1 key
671
+ # less as this will become the branch key in a parent node.
672
+ if node.keys.size < min_keys - (node.is_leaf ? 0 : 1)
673
+ node.error "BTreeNode #{node.node_address} has too few keys"
674
+ return nil
675
+ end
519
676
  end
677
+
520
678
  if node.keys.size > @tree.order
521
679
  node.error "BTreeNode must not have more then #{@tree.order} " +
522
680
  "keys, but has #{node.keys.size} keys"
681
+ return nil
523
682
  end
524
683
 
525
684
  last_key = nil
@@ -527,45 +686,101 @@ module PEROBS
527
686
  if last_key && key < last_key
528
687
  node.error "Keys are not increasing monotoneously: " +
529
688
  "#{node.keys.inspect}"
530
- return false
689
+ return nil
531
690
  end
691
+ last_key = key
532
692
  end
533
693
 
534
694
  if node.is_leaf
695
+ if stats.branch_depth
696
+ unless stats.branch_depth == node.tree_level
697
+ node.error "All leaf nodes must have same distance from root "
698
+ return nil
699
+ end
700
+ else
701
+ stats.branch_depth = node.tree_level
702
+ end
703
+ if node.prev_sibling && !node.prev_sibling.is_a?(BTreeNodeLink)
704
+ node.error "prev_sibling is a #{node.prev_sibling.class} " +
705
+ "instead of a BTreeNodeLink"
706
+ return nil
707
+ end
708
+ if node.next_sibling && !node.next_sibling.is_a?(BTreeNodeLink)
709
+ node.error "next_sibling is a #{node.next_sibling.class} " +
710
+ "instead of a BTreeNodeLink"
711
+ return nil
712
+ end
713
+ if node.prev_sibling.nil? && @tree.first_leaf != node
714
+ node.error "Leaf node #{node.node_address} has no previous " +
715
+ "sibling but is not the first leaf of the tree"
716
+ return nil
717
+ end
718
+ if node.next_sibling.nil? && @tree.last_leaf != node
719
+ node.error "Leaf node #{node.node_address} has no next sibling " +
720
+ "but is not the last leaf of the tree"
721
+ return nil
722
+ end
535
723
  unless node.keys.size == node.values.size
536
724
  node.error "Key count (#{node.keys.size}) and value " +
537
725
  "count (#{node.values.size}) don't match"
538
- return false
726
+ return nil
539
727
  end
728
+ unless node.children.nil?
729
+ node.error "@children must be nil for a leaf node"
730
+ return nil
731
+ end
732
+
733
+ stats.leave_nodes += 1
734
+ stats.leaves += node.keys.length
540
735
  else
541
- unless node.keys.size == node.children.size - 1
736
+ unless node.values.nil?
737
+ node.error "@values must be nil for a branch node"
738
+ return nil
739
+ end
740
+ unless node.children.size == node.keys.size + 1
542
741
  node.error "Key count (#{node.keys.size}) must be one " +
543
742
  "less than children count (#{node.children.size})"
544
- return false
743
+ return nil
545
744
  end
546
745
  node.children.each_with_index do |child, i|
547
746
  unless child.is_a?(BTreeNodeLink)
548
747
  node.error "Child #{i} is of class #{child.class} " +
549
748
  "instead of BTreeNodeLink"
550
- return false
749
+ return nil
551
750
  end
552
751
  unless child.parent.is_a?(BTreeNodeLink)
553
752
  node.error "Parent reference of child #{i} is of class " +
554
- "#{child.class} instead of BTreeNodeLink"
555
- return false
753
+ "#{child.parent.class} instead of BTreeNodeLink"
754
+ return nil
556
755
  end
557
- if child.node_address == node.node_address
756
+ if child == node
558
757
  node.error "Child #{i} points to self"
559
- return false
758
+ return nil
560
759
  end
561
760
  if stack.include?(child)
562
761
  node.error "Child #{i} points to ancester node"
563
- return false
762
+ return nil
564
763
  end
565
764
  unless child.parent == node
566
765
  node.error "Child #{i} does not have parent pointing " +
567
766
  "to this node"
568
- return false
767
+ return nil
768
+ end
769
+ if i > 0
770
+ unless node.children[i - 1].next_sibling == child
771
+ node.error "next_sibling of node " +
772
+ "#{node.children[i - 1].node_address} " +
773
+ "must point to node #{child.node_address}"
774
+ return nil
775
+ end
776
+ end
777
+ if i < node.children.length - 1
778
+ unless child == node.children[i + 1].prev_sibling
779
+ node.error "prev_sibling of node " +
780
+ "#{node.children[i + 1].node_address} " +
781
+ "must point to node #{child.node_address}"
782
+ return nil
783
+ end
569
784
  end
570
785
  end
571
786
  end
@@ -578,25 +793,26 @@ module PEROBS
578
793
  node.error "Child #{node.children[index].node_address} " +
579
794
  "has too large key #{node.children[index].keys.last}. " +
580
795
  "Must be smaller than #{node.keys[index]}."
581
- return false
796
+ return nil
582
797
  end
583
- unless node.children[position].keys.first >=
584
- node.keys[index]
798
+ unless node.children[position].keys.first >= node.keys[index]
585
799
  node.error "Child #{node.children[position].node_address} " +
586
800
  "has too small key #{node.children[position].keys.first}. " +
587
801
  "Must be larger than or equal to #{node.keys[index]}."
588
- return false
802
+ return nil
589
803
  end
590
804
  else
591
805
  if block_given?
592
806
  # If a block was given, call this block with the key and value.
593
- return false unless yield(node.keys[index], node.values[index])
807
+ unless yield(node.keys[index], node.values[index])
808
+ return nil
809
+ end
594
810
  end
595
811
  end
596
812
  end
597
813
  end
598
814
 
599
- true
815
+ stats
600
816
  end
601
817
 
602
818
  def is_top?
@@ -652,6 +868,7 @@ module PEROBS
652
868
 
653
869
  str = (is_last_child ? ' ' : ' |') + str
654
870
  node = node.parent
871
+ node = node.get_node if node
655
872
  end
656
873
 
657
874
  str
@@ -671,13 +888,37 @@ module PEROBS
671
888
  s += ' ^@'
672
889
  end
673
890
  end
891
+ if @prev_sibling
892
+ begin
893
+ s += " <#{@prev_sibling.node_address}"
894
+ rescue
895
+ s += ' <@'
896
+ end
897
+ end
898
+ if @next_sibling
899
+ begin
900
+ s += " >#{@next_sibling.node_address}"
901
+ rescue
902
+ s += ' >@'
903
+ end
904
+ end
674
905
 
675
906
  s
676
907
  end
677
908
 
909
+ def tree_level
910
+ level = 1
911
+ node = self
912
+ while (node = node.parent)
913
+ level += 1
914
+ end
915
+
916
+ level
917
+ end
918
+
919
+
678
920
  def error(msg)
679
- PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}\n" +
680
- @tree.to_s
921
+ PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}"
681
922
  end
682
923
 
683
924
  def write_node
@@ -685,7 +926,9 @@ module PEROBS
685
926
  @is_leaf ? 1 : 0,
686
927
  @keys.size,
687
928
  @is_leaf ? @values.size : @children.size,
688
- @parent ? @parent.node_address : 0
929
+ @parent ? @parent.node_address : 0,
930
+ @prev_sibling ? @prev_sibling.node_address : 0,
931
+ @next_sibling ? @next_sibling.node_address : 0
689
932
  ] + @keys + ::Array.new(@tree.order - @keys.size, 0)
690
933
 
691
934
  if @is_leaf
@@ -708,24 +951,107 @@ module PEROBS
708
951
 
709
952
  private
710
953
 
711
- def find_closest_siblings(key)
712
- # The root node has no siblings.
713
- return [ nil, nil, nil ] unless @parent
954
+ def min_keys
955
+ @tree.order / 2
956
+ end
957
+
958
+ def link(node)
959
+ return nil if node.nil?
960
+
961
+ if node.is_a?(BTreeNodeLink)
962
+ return node
963
+ elsif node.is_a?(BTreeNode) || node.is_a?(Integer)
964
+ return BTreeNodeLink.new(@tree, node)
965
+ else
966
+ PEROBS.log.fatal "Node link must be a BTreeNode, not a #{node.class}"
967
+ end
968
+ end
969
+
970
+ # Try to borrow an element from the preceding sibling.
971
+ # @return [True or False] True if an element was borrowed, false
972
+ # otherwise.
973
+ def borrow_from_previous_sibling(prev_node)
974
+ if prev_node.keys.length - 1 > min_keys
975
+ index = @parent.search_node_index(self) - 1
976
+
977
+ @tree.node_cache.insert(self)
978
+ @tree.node_cache.insert(prev_node.get_node)
979
+ @tree.node_cache.insert(@parent.get_node)
980
+ if @is_leaf
981
+ # Move the last key of the previous node to the front of this node
982
+ @keys.unshift(prev_node.keys.pop)
983
+ # Register the new lead key of this node with its parent
984
+ @parent.keys[index] = @keys.first
985
+ # Move the last value of the previous node to the front of this node
986
+ @values.unshift(prev_node.values.pop)
987
+ else
988
+ # For branch nodes the branch key will be the borrowed key.
989
+ @keys.unshift(@parent.keys[index])
990
+ # And the last key of the previous key will become the new branch
991
+ # key for this node.
992
+ @parent.keys[index] = prev_node.keys.pop
993
+ # Move the last child of the previous node to the front of this node
994
+ @children.unshift(node = prev_node.children.pop)
995
+ node.parent = link(self)
996
+ end
997
+
998
+ return true
999
+ end
1000
+
1001
+ false
1002
+ end
1003
+
1004
+ # Try to borrow an element from the next sibling.
1005
+ # @return [True or False] True if an element was borrowed, false
1006
+ # otherwise.
1007
+ def borrow_from_next_sibling(next_node)
1008
+ if next_node.keys.length - 1 > min_keys
1009
+ # The next sibling now has a new lead key that requires the branch key
1010
+ # to be updated in the parent node.
1011
+ index = next_node.parent.search_node_index(next_node) - 1
1012
+
1013
+ @tree.node_cache.insert(self)
1014
+ @tree.node_cache.insert(next_node.get_node)
1015
+ @tree.node_cache.insert(next_node.parent.get_node)
1016
+ if @is_leaf
1017
+ # Move the first key of the next node to the end of the this node
1018
+ @keys << next_node.keys.shift
1019
+ # Register the new lead key of next_node with its parent
1020
+ next_node.parent.keys[index] = next_node.keys.first
1021
+ # Move the first value of the next node to the end of this node
1022
+ @values << next_node.values.shift
1023
+ else
1024
+ # For branch nodes we need to get the lead key from the parent of
1025
+ # next_node.
1026
+ @keys << next_node.parent.keys[index]
1027
+ # The old lead key of next_node becomes the branch key in the parent
1028
+ # of next_node. And the keys of next_node are shifted.
1029
+ next_node.parent.keys[index] = next_node.keys.shift
1030
+ # Move the first child of the next node to the end of this node
1031
+ @children << (node = next_node.children.shift)
1032
+ node.parent = link(self)
1033
+ end
1034
+
1035
+ return true
1036
+ end
1037
+
1038
+ false
1039
+ end
1040
+
1041
+ def update_branch_key(old_key)
1042
+ new_key = @keys.first
1043
+ return unless (node = @parent)
714
1044
 
715
- parent_index = @parent.search_key_index(key)
716
- unless @parent.children[parent_index] == self
717
- PEROBS.log.fatal "Failed to find self in parent"
1045
+ while node
1046
+ if (index = node.keys.index(old_key))
1047
+ node.keys[index] = new_key
1048
+ @tree.node_cache.insert(node.get_node)
1049
+ return
1050
+ end
1051
+ node = node.parent
718
1052
  end
719
- # The child that corresponds to the key at parent_index has an index of
720
- # parent_index + 1! The lower_sibling has an child index of
721
- # parent_index and the upper sibling has a child index of parent_index +
722
- # 2.
723
- lower_sibling = parent_index < 1 ?
724
- nil : @parent.children[parent_index - 1]
725
- upper_sibling = parent_index >= (@parent.children.size - 1) ?
726
- nil : @parent.children[parent_index + 1]
727
1053
 
728
- [ lower_sibling, upper_sibling, parent_index ]
1054
+ # The smallest element has no branch key.
729
1055
  end
730
1056
 
731
1057
  end