perobs 4.0.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,1002 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArrayNode.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/Array'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArrayNode class provides the BTree nodes for the BigArray objects.
35
+ # A node can either be a branch node or a leaf node. Branch nodes don't
36
+ # store values, only offsets and references to child nodes. Leaf nodes don't
37
+ # have child nodes but store the actual values. The leaf nodes always
38
+ # contain at least node_size / 2 number of consecutive values. The index of
39
+ # the first value in the BigArray is the sum of the offsets stored in the
40
+ # parent nodes. Branch nodes store the offsets and the corresponding
41
+ # child node references. The first offset is always 0. Consecutive offsets
42
+ # are set to the previous offset plus the total number of values stored in
43
+ # the previous child node. The leaf nodes don't contain wholes. A
44
+ # concatenation of all leaf node values represents the stored Array.
45
+ #
46
+ # Root Node +--------------------------------+
47
+ # Offsets | 0 11 |
48
+ # Children | |
49
+ # v v
50
+ # Level 1 +--------------------------++--------------------------+
51
+ # Offsets | 0 4 7 || 0 2 5 |
52
+ # Children | | | | | |
53
+ # v v v v v v
54
+ # Leaves +---------++-------++----------++-------++----------++-------+
55
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
56
+ #
57
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
58
+ #
59
+ class BigArrayNode < PEROBS::Object
60
+
61
+ attr_persist :tree, :parent, :offsets, :values, :children,
62
+ :prev_sibling, :next_sibling
63
+
64
+ # Internal constructor. Use Store.new(BigArrayNode, ...) instead.
65
+ # @param p [Handle]
66
+ # @param tree [BigArray] The tree this node should belong to
67
+ # @param is_leaf [Boolean] True if a leaf node should be created, false
68
+ # for a branch node.
69
+ # @param parent [BigArrayNode] Parent node
70
+ # @param prev_sibling [BigArrayNode] Previous sibling
71
+ # @param next_sibling [BigArrayNode] Next sibling
72
+ def initialize(p, tree, is_leaf, parent = nil,
73
+ prev_sibling = nil, next_sibling = nil)
74
+ super(p)
75
+ self.tree = tree
76
+ self.parent = parent
77
+
78
+ if is_leaf
79
+ # Create a new leaf node. It stores values and has no children.
80
+ self.values = @store.new(PEROBS::Array)
81
+ self.children = self.offsets = nil
82
+
83
+ # Link the neighboring siblings to the newly inserted node. If the
84
+ # node has no sibling on a side we also must register it as first or
85
+ # last leaf with the BigArray object.
86
+ if (self.prev_sibling = prev_sibling)
87
+ @prev_sibling.next_sibling = myself
88
+ else
89
+ @tree.first_leaf = myself
90
+ end
91
+ if (self.next_sibling = next_sibling)
92
+ @next_sibling.prev_sibling = myself
93
+ else
94
+ @tree.last_leaf = myself
95
+ end
96
+ else
97
+ # Create a new branch node. It stores keys and child node references
98
+ # but no values.
99
+ self.offsets = @store.new(PEROBS::Array)
100
+ self.children = @store.new(PEROBS::Array)
101
+ self.values = nil
102
+ # Branch nodes don't need sibling links.
103
+ self.prev_sibling = self.next_sibling = nil
104
+ end
105
+ end
106
+
107
+ # @return [Boolean] True if this is a leaf node, false otherwise.
108
+ def is_leaf?
109
+ @children.nil?
110
+ end
111
+
112
+ def size
113
+ is_leaf? ? @values.size : @children.size
114
+ end
115
+
116
+ # @return [Integer] the number of values stored in this node.
117
+ def values_count
118
+ count = 0
119
+ node = self
120
+ while node
121
+ if node.is_leaf?
122
+ return count + node.values.size
123
+ else
124
+ count += node.offsets.last
125
+ node = node.children.last
126
+ end
127
+ end
128
+ end
129
+
130
+
131
+ # Set the given value at the given index.
132
+ # @param index [Integer] Position to insert at
133
+ # @param value [Integer] value to insert
134
+ def set(index, value)
135
+ node = self
136
+
137
+ # Traverse the tree to find the right node to add or replace the value.
138
+ while node do
139
+ # Once we have reached a leaf node we can insert or replace the value.
140
+ if node.is_leaf?
141
+ if index >= node.values.size
142
+ node.fatal "Set index (#{index}) larger than values array " +
143
+ "(#{node.values.size})."
144
+ end
145
+ node.values[index] = value
146
+ return
147
+ else
148
+ # Descend into the right child node to add the value to.
149
+ cidx = node.search_child_index(index)
150
+ if (index -= node.offsets[cidx]) < 0
151
+ node.fatal "Index (#{index}) became negative"
152
+ end
153
+ node = node.children[cidx]
154
+ end
155
+ end
156
+
157
+ node.fatal "Could not find proper node to set the value while " +
158
+ "looking for index #{index}"
159
+ end
160
+
161
+ # Insert the given value at the given index. All following values will be
162
+ # pushed to a higher index.
163
+ # @param index [Integer] Position to insert at
164
+ # @param value [Integer] value to insert
165
+ def insert(index, value)
166
+ node = self
167
+ cidx = nil
168
+
169
+ # Traverse the tree to find the right node to add or replace the value.
170
+ while node do
171
+ # All nodes that we find on the way that are full will be split into
172
+ # two half-full nodes.
173
+ if node.size >= @tree.node_size
174
+ # Re-add the index from the last parent node since we will descent
175
+ # into one of the split nodes.
176
+ index += node.parent.offsets[cidx] if node.parent
177
+ node = node.split_node
178
+ end
179
+
180
+ # Once we have reached a leaf node we can insert or replace the value.
181
+ if node.is_leaf?
182
+ node.values.insert(index, value)
183
+ node.parent.adjust_offsets(node, 1) if node.parent
184
+ return
185
+ else
186
+ # Descend into the right child node to add the value to.
187
+ cidx = node.search_child_index(index)
188
+ if (index -= node.offsets[cidx]) < 0
189
+ node.fatal "Index (#{index}) became negative"
190
+ end
191
+ node = node.children[cidx]
192
+ end
193
+ end
194
+
195
+ node.fatal "Could not find proper node to insert the value while " +
196
+ "looking for index #{index}"
197
+ end
198
+
199
+ # Return the value that matches the given key or return nil if they key is
200
+ # unknown.
201
+ # @param index [Integer] Position to insert at
202
+ # @return [Integer or nil] value that matches the key
203
+ def get(index)
204
+ node = self
205
+
206
+ # Traverse the tree to find the right node to add or replace the value.
207
+ while node do
208
+ # Once we have reached a leaf node we can insert or replace the value.
209
+ if node.is_leaf?
210
+ return node.values[index]
211
+ else
212
+ # Descend into the right child node to add the value to.
213
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
214
+ node.offsets.length) - 1
215
+ if (index -= node.offsets[cidx]) < 0
216
+ node.fatal "Index (#{index}) became negative"
217
+ end
218
+ node = node.children[cidx]
219
+ end
220
+ end
221
+
222
+ PEROBS.log.fatal "Could not find proper node to get from while " +
223
+ "looking for index #{index}"
224
+ end
225
+
226
+ # Delete the element at the specified index, returning that element, or
227
+ # nil if the index is out of range.
228
+ # @param index [Integer] Index in the BigArray
229
+ # @return [Object] found value or nil
230
+ def delete_at(index)
231
+ node = self
232
+ deleted_value = nil
233
+
234
+ while node do
235
+ if node.is_leaf?
236
+ deleted_value = node.values.delete_at(index)
237
+ if node.parent
238
+ node.parent.adjust_offsets(node, -1)
239
+ if node.size < min_size
240
+ node.parent.consolidate_child_nodes(node)
241
+ end
242
+ end
243
+
244
+ return deleted_value
245
+ else
246
+ # Descend into the right child node to add the value to.
247
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
248
+ node.offsets.length) - 1
249
+ if (index -= node.offsets[cidx]) < 0
250
+ node.fatal "Index (#{index}) became negative"
251
+ end
252
+ node = node.children[cidx]
253
+ end
254
+ end
255
+
256
+ PEROBS.log.fatal "Could not find proper node to delete from while " +
257
+ "looking for index #{index}"
258
+ end
259
+
260
+ # Iterate over all the values of the node.
261
+ # @yield [value]
262
+ def each
263
+ return nil unless is_leaf?
264
+
265
+ @values.each do |v|
266
+ yield(v)
267
+ end
268
+ end
269
+
270
+ # Iterate over all the values of the node in reverse order.
271
+ # @yield [value]
272
+ def reverse_each
273
+ return nil unless is_leaf?
274
+
275
+ @values.reverse_each do |v|
276
+ yield(v)
277
+ end
278
+ end
279
+
280
+ # Check consistency of the node and all subsequent nodes. In case an error
281
+ # is found, a message is logged and false is returned.
282
+ # @yield [key, value]
283
+ # @return [Boolean] true if tree has no errors
284
+ def check
285
+ branch_depth = nil
286
+
287
+ traverse do |node, position, stack|
288
+ if position == 0
289
+ # Nodes should have between min_size() and
290
+ # @tree.node_size children or values. Only the root node may have
291
+ # less.
292
+ if node.size > @tree.node_size
293
+ node.error "BigArray node #{node._id} is too large. It has " +
294
+ "#{node.size} nodes instead of max. #{@tree.node_size}."
295
+ return false
296
+ end
297
+ if node.parent && node.size < min_size
298
+ node.error "BigArray node #{node._id} is too small"
299
+ return false
300
+ end
301
+
302
+ if node.is_leaf?
303
+ # All leaf nodes must have same distance from root node.
304
+ if branch_depth
305
+ unless branch_depth == stack.size
306
+ node.error "All leaf nodes must have same distance from root"
307
+ return false
308
+ end
309
+ else
310
+ branch_depth = stack.size
311
+ end
312
+
313
+ return false unless node.check_leaf_node_links
314
+
315
+ if node.children
316
+ node.error "children must be nil for a leaf node"
317
+ return false
318
+ end
319
+ else
320
+ unless node.children.size == node.offsets.size
321
+ node.error "Offset count (#{node.offsets.size}) must be equal " +
322
+ "to children count (#{node.children.size})"
323
+ return false
324
+ end
325
+
326
+ if node.values
327
+ node.error "values must be nil for a branch node"
328
+ return false
329
+ end
330
+
331
+ unless @prev_sibling.nil? && @next_sibling.nil?
332
+ node.error "prev_sibling and next_sibling must be nil for " +
333
+ "branch nodes"
334
+ end
335
+
336
+ return false unless node.check_offsets
337
+
338
+ return false unless node.check_child_nodes(stack)
339
+ end
340
+ elsif position <= node.size
341
+ # These checks are done after we have completed the respective child
342
+ # node with index 'position - 1'.
343
+ index = position - 1
344
+ if node.is_leaf?
345
+ if block_given?
346
+ # If a block was given, call this block with the key and value.
347
+ return false unless yield(node.first_index + index,
348
+ node.values[index])
349
+ end
350
+ end
351
+ end
352
+ end
353
+
354
+ true
355
+ end
356
+
357
+ def check_leaf_node_links
358
+ if @prev_sibling.nil?
359
+ if @tree.first_leaf != self
360
+ error "Leaf node #{@_id} has no previous sibling " +
361
+ "but is not the first leaf of the tree"
362
+ return false
363
+ end
364
+ elsif @prev_sibling.next_sibling != self
365
+ error "next_sibling of previous sibling does not point to " +
366
+ "this node"
367
+ return false
368
+ end
369
+
370
+ if @next_sibling.nil?
371
+ if @tree.last_leaf != self
372
+ error "Leaf node #{@_id} has no next sibling " +
373
+ "but is not the last leaf of the tree"
374
+ return false
375
+ end
376
+ elsif @next_sibling.prev_sibling != self
377
+ error "previous_sibling of next sibling does not point to " +
378
+ "this node"
379
+ return false
380
+ end
381
+
382
+ true
383
+ end
384
+
385
+ def check_offsets
386
+ return true if @parent.nil? && @offsets.empty?
387
+
388
+ if @offsets[0] != 0
389
+ error "First offset is not 0: #{@offsets.inspect}"
390
+ return false
391
+ end
392
+
393
+ last_offset = nil
394
+ @offsets.each_with_index do |offset, i|
395
+ if i > 0
396
+ if offset < last_offset
397
+ error "Offsets are not strictly monotoneously " +
398
+ "increasing: #{@offsets.inspect}"
399
+ return false
400
+ end
401
+ expected_offset = last_offset + @children[i - 1].values_count
402
+ if offset != expected_offset
403
+ error "Offset #{i} must be #{expected_offset} " +
404
+ "but is #{offset}."
405
+ return false
406
+ end
407
+ end
408
+
409
+ last_offset = offset
410
+ end
411
+
412
+ true
413
+ end
414
+
415
+ def check_child_nodes(stack)
416
+ if @children.uniq.size != @children.size
417
+ error "Node #{@_id} has multiple identical children"
418
+ return false
419
+ end
420
+
421
+ @children.each_with_index do |child, i|
422
+ unless child.is_a?(BigArrayNode)
423
+ error "Child #{@_id} is of class #{child.class} " +
424
+ "instead of BigArrayNode"
425
+ return false
426
+ end
427
+
428
+ unless child.parent.is_a?(BigArrayNode)
429
+ error "Parent reference of child #{i} is of class " +
430
+ "#{child.class} instead of BigArrayNode"
431
+ return false
432
+ end
433
+
434
+ if child.parent != self
435
+ error "Child node #{child._id} has wrong parent " +
436
+ "#{child.parent._id}. It should be #{@_id}."
437
+ return false
438
+ end
439
+
440
+ if child == self
441
+ error "Child #{i} point to self"
442
+ return false
443
+ end
444
+
445
+ if stack.include?(child)
446
+ error "Child #{i} points to ancester node"
447
+ return false
448
+ end
449
+
450
+ unless child.parent == self
451
+ error "Child #{i} does not have parent pointing " +
452
+ "to this node"
453
+ return false
454
+ end
455
+ end
456
+
457
+ true
458
+ end
459
+
460
+ # @return [String] Human reable form of the sub-tree.
461
+ def to_s
462
+ str = ''
463
+
464
+ traverse do |node, position, stack|
465
+ if position == 0
466
+ begin
467
+ str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
468
+ "#{node.tree_branch_mark}-" +
469
+ "#{node.size == 0 ? '--' : 'v-'}#{node.tree_summary}\n"
470
+ rescue => e
471
+ str += "@@@@@@@@@@: #{e.message}\n"
472
+ end
473
+ else
474
+ begin
475
+ if node.is_leaf?
476
+ if position <= node.size
477
+ str += "#{node.tree_prefix} " +
478
+ "#{position == node.size ? '-' : '|'} " +
479
+ "[ #{node.value_index(position - 1)}: " +
480
+ "#{node.values[position - 1].nil? ?
481
+ 'nil' : node.values[position - 1]} ]\n"
482
+ end
483
+ end
484
+ rescue => e
485
+ str += "@@@@@@@@@@: #{e.message}\n"
486
+ end
487
+ end
488
+ end
489
+
490
+ str
491
+ end
492
+
493
+ # Split the current node into two nodes. The upper half of the elements
494
+ # will be moved into a newly created node. This node will retain the lower
495
+ # half.
496
+ # @return [BigArrayNode] common parent of the two nodes
497
+ def split_node
498
+ unless @parent
499
+ # The node is the root node. We need to create a parent node first.
500
+ self.parent = @store.new(BigArrayNode, @tree, false)
501
+ @parent.offsets[0] = 0
502
+ @parent.children[0] = myself
503
+ @tree.root = @parent
504
+ end
505
+
506
+ # Create the new sibling that will take the 2nd half of the
507
+ # node content.
508
+ sibling = @store.new(BigArrayNode, @tree, is_leaf?, @parent, myself,
509
+ @next_sibling)
510
+ # Determine the index of the middle element that gets moved to the
511
+ # parent. The node size must be an uneven number.
512
+ mid = size / 2
513
+ if is_leaf?
514
+ # Before:
515
+ # +--------------------------+
516
+ # | 0 4 7 |
517
+ # | | |
518
+ # v v v
519
+ # +---------++-------++----------+
520
+ # | A B C D || E F G || H I J K |
521
+ #
522
+ # After:
523
+ # +--------------------------+
524
+ # | 0 2 4 7 |
525
+ # | | | |
526
+ # v v v v
527
+ # +-----++----++-------++----------+
528
+ # | A B || C D || E F G || H I J K |
529
+ #
530
+ #
531
+ # Insert the middle element key into the parent node
532
+ @parent.insert_child_after_peer(mid, sibling, self)
533
+ # Copy the values from the mid element onwards into the new
534
+ # sibling node.
535
+ sibling.values += @values[mid..-1]
536
+ # Delete the copied offsets and values from this node.
537
+ @values.slice!(mid..-1)
538
+ else
539
+ # Before:
540
+ # +--------------+
541
+ # | 0 11 |
542
+ # | |
543
+ # v v
544
+ # +----------++-------+
545
+ # | 0 4 7 10 || 0 2 5 |
546
+ # | | | | | | |
547
+ # v v v v v v v
548
+ #
549
+ # After:
550
+ # +------------------+
551
+ # | 0 7 11 |
552
+ # | | |
553
+ # v v v
554
+ # +-----++-----++-------+
555
+ # | 0 4 0 3 || 0 2 5 |
556
+ # | | | | | | |
557
+ # v v v v v v v
558
+ #
559
+ # Insert the new sibling into the parent node.
560
+ offset_delta = @offsets[mid]
561
+ @parent.insert_child_after_peer(offset_delta, sibling, self)
562
+ # Copy the offsets from after the mid value onwards to the new sibling
563
+ # node. We substract the offset delta from each of them.
564
+ sibling.offsets += @offsets[mid..-1].map{ |v| v - offset_delta }
565
+ # Delete the copied offsets from this node.
566
+ @offsets.slice!(mid..-1)
567
+ # Same copy for the children.
568
+ sibling.children += @children[mid..-1]
569
+ # Reparent the children to the new sibling parent.
570
+ sibling.children.each { |c| c.parent = sibling }
571
+ # And delete the copied children references.
572
+ @children.slice!(mid..-1)
573
+ end
574
+
575
+ @parent
576
+ end
577
+
578
+ def insert_child_after_peer(offset, node, peer = nil)
579
+ peer_index = @children.find_index(peer)
580
+ cidx = peer_index ? peer_index + 1 : 0
581
+ @offsets.insert(cidx, @offsets[peer_index] + offset)
582
+ @children.insert(cidx, node)
583
+ end
584
+
585
+ def consolidate_child_nodes(child)
586
+ unless (child_index = @children.index(child))
587
+ error "Cannot find child to consolidate"
588
+ end
589
+
590
+ if child_index == 0
591
+ # Consolidate with successor if it exists.
592
+ return unless (succ = @children[child_index + 1])
593
+
594
+ if child.size + succ.size <= @tree.node_size
595
+ # merge child with successor
596
+ merge_child_with_next(child_index)
597
+ else
598
+ move_first_element_of_successor_to_child(child_index)
599
+ end
600
+ else
601
+ # consolidate with predecessor
602
+ pred = @children[child_index - 1]
603
+
604
+ if pred.size + child.size <= @tree.node_size
605
+ # merge child with predecessor
606
+ merge_child_with_next(child_index - 1)
607
+ else
608
+ move_last_element_of_predecessor_to_child(child_index)
609
+ end
610
+ end
611
+ end
612
+
613
+ # @param offset [Integer] offset to search the child index for
614
+ # @return [Integer] Index of the matching offset or the insert position.
615
+ def search_child_index(offset)
616
+ # Handle special case for empty offsets list.
617
+ return 0 if @offsets.empty? || offset <= @offsets.first
618
+
619
+ (@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
620
+ end
621
+
622
+ # @return The index of the current node in the children list of the parent
623
+ # node. If the node is the root node, nil is returned.
624
+ def index_in_parent_node
625
+ return nil unless @parent
626
+
627
+ @parent.children.find_index(self)
628
+ end
629
+
630
+ def first_index
631
+ # TODO: This is a very expensive method. Find a way to make this way
632
+ # faster.
633
+ node = parent
634
+ child = myself
635
+ while node
636
+ if (index = node.children.index(child)) && index > 0
637
+ return node.offsets[index - 1]
638
+ end
639
+ child = node
640
+ node = node.parent
641
+ end
642
+
643
+ 0
644
+ end
645
+
646
+ # Compute the array index of the value with the given index in the current
647
+ # node.
648
+ # @param idx [Integer] Index of the value in the current node
649
+ # @return [Integer] Array index of the value
650
+ def value_index(idx)
651
+ node = self
652
+ while node.parent
653
+ idx += node.parent.offsets[node.index_in_parent_node]
654
+ node = node.parent
655
+ end
656
+
657
+ idx
658
+ end
659
+
660
+ # This method takes care of adjusting the offsets in tree in case elements
661
+ # were inserted or removed. All nodes that hold children after the
662
+ # insert/remove operation need to be adjusted. Since child nodes get their
663
+ # offsets via their parents, only the parent node and the direct ancestor
664
+ # followers need to be adjusted.
665
+ # @param after_child [BigArrayNode] specifies the modified leaf node
666
+ # @param delta [Integer] specifies how many elements were inserted or
667
+ # removed.
668
+ def adjust_offsets(after_child, delta)
669
+ node = self
670
+
671
+ while node
672
+ adjust = false
673
+ 0.upto(node.children.size - 1) do |i|
674
+ # Iterate over the children until we have found the after_child
675
+ # node. Then turn on adjustment mode. The offsets of the following
676
+ # entries will be adjusted by delta.
677
+ if adjust
678
+ node.offsets[i] += delta
679
+ elsif node.children[i] == after_child
680
+ adjust = true
681
+ end
682
+ end
683
+
684
+ unless adjust
685
+ node.fatal "Could not find child #{after_child._id}"
686
+ end
687
+
688
+ after_child = node
689
+ node = node.parent
690
+ end
691
+ end
692
+
693
+ # This is a generic tree iterator. It yields before it descends into the
694
+ # child node and after (which is identical to before the next child
695
+ # descend). It yields the node, the position and the stack of parent
696
+ # nodes.
697
+ # @yield [node, position, stack]
698
+ def traverse
699
+ # We use a non-recursive implementation to traverse the tree. This stack
700
+ # keeps track of all the known still to be checked nodes.
701
+ stack = [ [ self, 0 ] ]
702
+
703
+ while !stack.empty?
704
+ node, position = stack.pop
705
+
706
+ # Call the payload method. The position marks where we are in the node
707
+ # with respect to the traversal. 0 means we've just entered the node
708
+ # for the first time and are about to descent to the first child.
709
+ # Position 1 is after the 1st child has been processed and before the
710
+ # 2nd child is being processed. If we have N children, the last
711
+ # position is N after we have processed the last child and are about
712
+ # to return to the parent node.
713
+ yield(node, position, stack)
714
+
715
+ if position <= node.size
716
+ # Push the next position for this node onto the stack.
717
+ stack.push([ node, position + 1 ])
718
+
719
+ if !node.is_leaf? && node.children[position]
720
+ # If we have a child node for this position, push the linked node
721
+ # and the starting position onto the stack.
722
+ stack.push([ node.children[position], 0 ])
723
+ end
724
+ end
725
+ end
726
+ end
727
+
728
+ # Gather some statistics about the node and all sub nodes.
729
+ # @param stats [Stats] Data structure that stores the gathered data
730
+ def statistics(stats)
731
+ traverse do |node, position, stack|
732
+ if position == 0
733
+ if node.is_leaf?
734
+ stats.leaf_nodes += 1
735
+ depth = stack.size + 1
736
+ if stats.min_depth.nil? || stats.min_depth < depth
737
+ stats.min_depth = depth
738
+ end
739
+ if stats.max_depth.nil? || stats.max_depth > depth
740
+ stats.max_depth = depth
741
+ end
742
+ else
743
+ stats.branch_nodes += 1
744
+ end
745
+ end
746
+ end
747
+ end
748
+
749
+ # Return the decoration that marks the tree structure of this node for the
750
+ # inspection method.
751
+ def tree_prefix
752
+ node = self
753
+ str = ''
754
+
755
+ while node
756
+ is_last_child = false
757
+ if node.parent
758
+ is_last_child = node.parent.children.last == node
759
+ else
760
+ # Don't add lines for the top-level.
761
+ break
762
+ end
763
+
764
+ str = (is_last_child ? ' ' : ' |') + str
765
+ node = node.parent
766
+ end
767
+
768
+ str
769
+ end
770
+
771
+ # Branch node decoration for the inspection method.
772
+ def tree_branch_mark
773
+ return '' unless @parent
774
+ '-'
775
+ end
776
+
777
+ # Text for the node line for the inspection method.
778
+ def tree_summary
779
+ s = " @#{@_id}"
780
+ if @parent
781
+ begin
782
+ s += " +#{@parent.offsets[index_in_parent_node]} ^#{@parent._id}"
783
+ rescue
784
+ s += ' ^@'
785
+ end
786
+ end
787
+ if @prev_sibling
788
+ begin
789
+ s += " <#{@prev_sibling._id}"
790
+ rescue
791
+ s += ' <@'
792
+ end
793
+ end
794
+ if @next_sibling
795
+ begin
796
+ s += " >#{@next_sibling._id}"
797
+ rescue
798
+ s += ' >@'
799
+ end
800
+ end
801
+
802
+ s
803
+ end
804
+
805
+ # Print and log an error message for the node.
806
+ def error(msg)
807
+ msg = "Error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
808
+ $stderr.puts msg
809
+ PEROBS.log.error msg
810
+ end
811
+
812
+ # Print and log an error message for the node.
813
+ def fatal(msg)
814
+ msg = "Fatal error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
815
+ $stderr.puts msg
816
+ PEROBS.log.fatal msg
817
+ end
818
+
819
+ private
820
+
821
+ def min_size
822
+ @tree.node_size / 2
823
+ end
824
+
825
+ # Move first element of successor to end of child node
826
+ # @param child_index [Integer] index of the child
827
+ def move_first_element_of_successor_to_child(child_index)
828
+ child = @children[child_index]
829
+ succ = @children[child_index + 1]
830
+
831
+ if child.is_leaf?
832
+ # Adjust offset for the successor node
833
+ @offsets[child_index + 1] += 1
834
+ # Move the value
835
+ child.values << succ.values.shift
836
+ else
837
+ # Before:
838
+ #
839
+ # Root Node +--------------------------------+
840
+ # Offsets | 0 7 |
841
+ # Children | |
842
+ # child v succ v
843
+ # Level 1 +---------------++-------------------------------------+
844
+ # Offsets | 0 4 || 0 4 6 9 |
845
+ # Children | | | | | |
846
+ # v v v v v v
847
+ # Leaves +---------++-------++----------++-------++----------++-------+
848
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
849
+ #
850
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
851
+ #
852
+ # After:
853
+ #
854
+ # Root Node +--------------------------------+
855
+ # Offsets | 0 11 |
856
+ # Children | |
857
+ # child v succ v
858
+ # Level 1 +--------------------------++--------------------------+
859
+ # Offsets | 0 4 7 || 0 2 5 |
860
+ # Children | | | | | |
861
+ # v v v v v v
862
+ # Leaves +---------++-------++----------++-------++----------++-------+
863
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
864
+ #
865
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
866
+ #
867
+ # Adjust the offsets of the successor. The 2nd original offset
868
+ # determines the delta for the parent node.
869
+ succ.offsets.shift
870
+ delta = succ.offsets.first
871
+ succ.offsets.map! { |o| o -= delta }
872
+ # The additional child offset can be taken from the parent node
873
+ # reference.
874
+ child.offsets << @offsets[child_index + 1]
875
+ # The parent node offset of the successor needs to be corrected by the
876
+ # delta value.
877
+ @offsets[child_index + 1] += delta
878
+ # Move the child reference
879
+ child.children << succ.children.shift
880
+ child.children.last.parent = child
881
+ end
882
+ end
883
+
884
+ # Move last element of predecessor node to child
885
+ # @param child_index [Integer] index of the child
886
+ def move_last_element_of_predecessor_to_child(child_index)
887
+ pred = @children[child_index - 1]
888
+ child = @children[child_index]
889
+
890
+ if child.is_leaf?
891
+ # Adjust offset for the predecessor node
892
+ @offsets[child_index] -= 1
893
+ # Move the value
894
+ child.values.unshift(pred.values.pop)
895
+ else
896
+ # Before:
897
+ #
898
+ # Root Node +--------------------------------+
899
+ # Offsets | 0 13 |
900
+ # Children | |
901
+ # pred v child v
902
+ # Level 1 +---------------------------------++-------------------+
903
+ # Offsets | 0 4 7 11 || 0 3 |
904
+ # Children | | | | | |
905
+ # v v v v v v
906
+ # Leaves +---------++-------++----------++-------++----------++-------+
907
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
908
+ #
909
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
910
+ #
911
+ # After:
912
+ #
913
+ # Root Node +--------------------------------+
914
+ # Offsets | 0 11 |
915
+ # Children | |
916
+ # pred v child v
917
+ # Level 1 +--------------------------++--------------------------+
918
+ # Offsets | 0 4 7 || 0 2 5 |
919
+ # Children | | | | | |
920
+ # v v v v v v
921
+ # Leaves +---------++-------++----------++-------++----------++-------+
922
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
923
+ #
924
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
925
+ #
926
+ # Remove the last predecessor offset and update the child offset with
927
+ # it
928
+ delta = pred.children.last.values_count
929
+ @offsets[child_index] -= delta
930
+ pred.offsets.pop
931
+ # Adjust all the offsets of the child
932
+ child.offsets.map! { |o| o += delta }
933
+ # And prepend the 0 offset
934
+ child.offsets.unshift(0)
935
+ # Move the child reference
936
+ child.children.unshift(pred.children.pop)
937
+ child.children.first.parent = child
938
+ end
939
+ end
940
+
941
+ def merge_child_with_next(child_index)
942
+ c1 = @children[child_index]
943
+ c2 = @children[child_index + 1]
944
+
945
+ if c1.is_leaf?
946
+ # Update the sibling links
947
+ c1.next_sibling = c2.next_sibling
948
+ c1.next_sibling.prev_sibling = c1 if c1.next_sibling
949
+
950
+ c1.values += c2.values
951
+ # Adjust the last_leaf reference in the @tree if c1 is now the last
952
+ # sibling.
953
+ @tree.last_leaf = c1 unless c1.next_sibling
954
+ else
955
+ # Before:
956
+ #
957
+ # Root Node +---------------------+
958
+ # Offsets | 0 11 |
959
+ # Children | |
960
+ # c1 v c2 v
961
+ # Level 1 +--------------------------++-----+
962
+ # Offsets | 0 4 7 || 0 |
963
+ # Children | | | |
964
+ # v v v v
965
+ # Leaves +---------++-------++----------++-------+
966
+ # Values | A B C D || E F G || H I J K || L M |
967
+ #
968
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
969
+ #
970
+ # After:
971
+ #
972
+ # Root Node +---+
973
+ # Offsets | 0 |
974
+ # Children |
975
+ # c1 v
976
+ # Level 1 +---------------------------------+
977
+ # Offsets | 0 4 7 11 |
978
+ # Children | | | |
979
+ # v v v v
980
+ # Leaves +---------++-------++----------++-------+
981
+ # Values | A B C D || E F G || H I J K || L M |
982
+ #
983
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
984
+ delta = @offsets[child_index + 1] - @offsets[child_index]
985
+ c1.offsets += c2.offsets.map { |o| o += delta }
986
+ c2.children.each { |c| c.parent = c1 }
987
+ c1.children += c2.children
988
+ end
989
+
990
+ # Remove the child successor from the node.
991
+ @offsets.delete_at(child_index + 1)
992
+ @children.delete_at(child_index + 1)
993
+
994
+ if @parent && size < min_size
995
+ @parent.consolidate_child_nodes(self)
996
+ end
997
+ end
998
+
999
+ end
1000
+
1001
+ end
1002
+