perobs 4.0.0 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,1002 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArrayNode.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/Array'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArrayNode class provides the BTree nodes for the BigArray objects.
35
+ # A node can either be a branch node or a leaf node. Branch nodes don't
36
+ # store values, only offsets and references to child nodes. Leaf nodes don't
37
+ # have child nodes but store the actual values. The leaf nodes always
38
+ # contain at least node_size / 2 number of consecutive values. The index of
39
+ # the first value in the BigArray is the sum of the offsets stored in the
40
+ # parent nodes. Branch nodes store the offsets and the corresponding
41
+ # child node references. The first offset is always 0. Consecutive offsets
42
+ # are set to the previous offset plus the total number of values stored in
43
+ # the previous child node. The leaf nodes don't contain wholes. A
44
+ # concatenation of all leaf node values represents the stored Array.
45
+ #
46
+ # Root Node +--------------------------------+
47
+ # Offsets | 0 11 |
48
+ # Children | |
49
+ # v v
50
+ # Level 1 +--------------------------++--------------------------+
51
+ # Offsets | 0 4 7 || 0 2 5 |
52
+ # Children | | | | | |
53
+ # v v v v v v
54
+ # Leaves +---------++-------++----------++-------++----------++-------+
55
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
56
+ #
57
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
58
+ #
59
+ class BigArrayNode < PEROBS::Object
60
+
61
+ attr_persist :tree, :parent, :offsets, :values, :children,
62
+ :prev_sibling, :next_sibling
63
+
64
+ # Internal constructor. Use Store.new(BigArrayNode, ...) instead.
65
+ # @param p [Handle]
66
+ # @param tree [BigArray] The tree this node should belong to
67
+ # @param is_leaf [Boolean] True if a leaf node should be created, false
68
+ # for a branch node.
69
+ # @param parent [BigArrayNode] Parent node
70
+ # @param prev_sibling [BigArrayNode] Previous sibling
71
+ # @param next_sibling [BigArrayNode] Next sibling
72
+ def initialize(p, tree, is_leaf, parent = nil,
73
+ prev_sibling = nil, next_sibling = nil)
74
+ super(p)
75
+ self.tree = tree
76
+ self.parent = parent
77
+
78
+ if is_leaf
79
+ # Create a new leaf node. It stores values and has no children.
80
+ self.values = @store.new(PEROBS::Array)
81
+ self.children = self.offsets = nil
82
+
83
+ # Link the neighboring siblings to the newly inserted node. If the
84
+ # node has no sibling on a side we also must register it as first or
85
+ # last leaf with the BigArray object.
86
+ if (self.prev_sibling = prev_sibling)
87
+ @prev_sibling.next_sibling = myself
88
+ else
89
+ @tree.first_leaf = myself
90
+ end
91
+ if (self.next_sibling = next_sibling)
92
+ @next_sibling.prev_sibling = myself
93
+ else
94
+ @tree.last_leaf = myself
95
+ end
96
+ else
97
+ # Create a new branch node. It stores keys and child node references
98
+ # but no values.
99
+ self.offsets = @store.new(PEROBS::Array)
100
+ self.children = @store.new(PEROBS::Array)
101
+ self.values = nil
102
+ # Branch nodes don't need sibling links.
103
+ self.prev_sibling = self.next_sibling = nil
104
+ end
105
+ end
106
+
107
+ # @return [Boolean] True if this is a leaf node, false otherwise.
108
+ def is_leaf?
109
+ @children.nil?
110
+ end
111
+
112
+ def size
113
+ is_leaf? ? @values.size : @children.size
114
+ end
115
+
116
+ # @return [Integer] the number of values stored in this node.
117
+ def values_count
118
+ count = 0
119
+ node = self
120
+ while node
121
+ if node.is_leaf?
122
+ return count + node.values.size
123
+ else
124
+ count += node.offsets.last
125
+ node = node.children.last
126
+ end
127
+ end
128
+ end
129
+
130
+
131
+ # Set the given value at the given index.
132
+ # @param index [Integer] Position to insert at
133
+ # @param value [Integer] value to insert
134
+ def set(index, value)
135
+ node = self
136
+
137
+ # Traverse the tree to find the right node to add or replace the value.
138
+ while node do
139
+ # Once we have reached a leaf node we can insert or replace the value.
140
+ if node.is_leaf?
141
+ if index >= node.values.size
142
+ node.fatal "Set index (#{index}) larger than values array " +
143
+ "(#{node.values.size})."
144
+ end
145
+ node.values[index] = value
146
+ return
147
+ else
148
+ # Descend into the right child node to add the value to.
149
+ cidx = node.search_child_index(index)
150
+ if (index -= node.offsets[cidx]) < 0
151
+ node.fatal "Index (#{index}) became negative"
152
+ end
153
+ node = node.children[cidx]
154
+ end
155
+ end
156
+
157
+ node.fatal "Could not find proper node to set the value while " +
158
+ "looking for index #{index}"
159
+ end
160
+
161
+ # Insert the given value at the given index. All following values will be
162
+ # pushed to a higher index.
163
+ # @param index [Integer] Position to insert at
164
+ # @param value [Integer] value to insert
165
+ def insert(index, value)
166
+ node = self
167
+ cidx = nil
168
+
169
+ # Traverse the tree to find the right node to add or replace the value.
170
+ while node do
171
+ # All nodes that we find on the way that are full will be split into
172
+ # two half-full nodes.
173
+ if node.size >= @tree.node_size
174
+ # Re-add the index from the last parent node since we will descent
175
+ # into one of the split nodes.
176
+ index += node.parent.offsets[cidx] if node.parent
177
+ node = node.split_node
178
+ end
179
+
180
+ # Once we have reached a leaf node we can insert or replace the value.
181
+ if node.is_leaf?
182
+ node.values.insert(index, value)
183
+ node.parent.adjust_offsets(node, 1) if node.parent
184
+ return
185
+ else
186
+ # Descend into the right child node to add the value to.
187
+ cidx = node.search_child_index(index)
188
+ if (index -= node.offsets[cidx]) < 0
189
+ node.fatal "Index (#{index}) became negative"
190
+ end
191
+ node = node.children[cidx]
192
+ end
193
+ end
194
+
195
+ node.fatal "Could not find proper node to insert the value while " +
196
+ "looking for index #{index}"
197
+ end
198
+
199
+ # Return the value that matches the given key or return nil if they key is
200
+ # unknown.
201
+ # @param index [Integer] Position to insert at
202
+ # @return [Integer or nil] value that matches the key
203
+ def get(index)
204
+ node = self
205
+
206
+ # Traverse the tree to find the right node to add or replace the value.
207
+ while node do
208
+ # Once we have reached a leaf node we can insert or replace the value.
209
+ if node.is_leaf?
210
+ return node.values[index]
211
+ else
212
+ # Descend into the right child node to add the value to.
213
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
214
+ node.offsets.length) - 1
215
+ if (index -= node.offsets[cidx]) < 0
216
+ node.fatal "Index (#{index}) became negative"
217
+ end
218
+ node = node.children[cidx]
219
+ end
220
+ end
221
+
222
+ PEROBS.log.fatal "Could not find proper node to get from while " +
223
+ "looking for index #{index}"
224
+ end
225
+
226
+ # Delete the element at the specified index, returning that element, or
227
+ # nil if the index is out of range.
228
+ # @param index [Integer] Index in the BigArray
229
+ # @return [Object] found value or nil
230
+ def delete_at(index)
231
+ node = self
232
+ deleted_value = nil
233
+
234
+ while node do
235
+ if node.is_leaf?
236
+ deleted_value = node.values.delete_at(index)
237
+ if node.parent
238
+ node.parent.adjust_offsets(node, -1)
239
+ if node.size < min_size
240
+ node.parent.consolidate_child_nodes(node)
241
+ end
242
+ end
243
+
244
+ return deleted_value
245
+ else
246
+ # Descend into the right child node to add the value to.
247
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
248
+ node.offsets.length) - 1
249
+ if (index -= node.offsets[cidx]) < 0
250
+ node.fatal "Index (#{index}) became negative"
251
+ end
252
+ node = node.children[cidx]
253
+ end
254
+ end
255
+
256
+ PEROBS.log.fatal "Could not find proper node to delete from while " +
257
+ "looking for index #{index}"
258
+ end
259
+
260
+ # Iterate over all the values of the node.
261
+ # @yield [value]
262
+ def each
263
+ return nil unless is_leaf?
264
+
265
+ @values.each do |v|
266
+ yield(v)
267
+ end
268
+ end
269
+
270
+ # Iterate over all the values of the node in reverse order.
271
+ # @yield [value]
272
+ def reverse_each
273
+ return nil unless is_leaf?
274
+
275
+ @values.reverse_each do |v|
276
+ yield(v)
277
+ end
278
+ end
279
+
280
+ # Check consistency of the node and all subsequent nodes. In case an error
281
+ # is found, a message is logged and false is returned.
282
+ # @yield [key, value]
283
+ # @return [Boolean] true if tree has no errors
284
+ def check
285
+ branch_depth = nil
286
+
287
+ traverse do |node, position, stack|
288
+ if position == 0
289
+ # Nodes should have between min_size() and
290
+ # @tree.node_size children or values. Only the root node may have
291
+ # less.
292
+ if node.size > @tree.node_size
293
+ node.error "BigArray node #{node._id} is too large. It has " +
294
+ "#{node.size} nodes instead of max. #{@tree.node_size}."
295
+ return false
296
+ end
297
+ if node.parent && node.size < min_size
298
+ node.error "BigArray node #{node._id} is too small"
299
+ return false
300
+ end
301
+
302
+ if node.is_leaf?
303
+ # All leaf nodes must have same distance from root node.
304
+ if branch_depth
305
+ unless branch_depth == stack.size
306
+ node.error "All leaf nodes must have same distance from root"
307
+ return false
308
+ end
309
+ else
310
+ branch_depth = stack.size
311
+ end
312
+
313
+ return false unless node.check_leaf_node_links
314
+
315
+ if node.children
316
+ node.error "children must be nil for a leaf node"
317
+ return false
318
+ end
319
+ else
320
+ unless node.children.size == node.offsets.size
321
+ node.error "Offset count (#{node.offsets.size}) must be equal " +
322
+ "to children count (#{node.children.size})"
323
+ return false
324
+ end
325
+
326
+ if node.values
327
+ node.error "values must be nil for a branch node"
328
+ return false
329
+ end
330
+
331
+ unless @prev_sibling.nil? && @next_sibling.nil?
332
+ node.error "prev_sibling and next_sibling must be nil for " +
333
+ "branch nodes"
334
+ end
335
+
336
+ return false unless node.check_offsets
337
+
338
+ return false unless node.check_child_nodes(stack)
339
+ end
340
+ elsif position <= node.size
341
+ # These checks are done after we have completed the respective child
342
+ # node with index 'position - 1'.
343
+ index = position - 1
344
+ if node.is_leaf?
345
+ if block_given?
346
+ # If a block was given, call this block with the key and value.
347
+ return false unless yield(node.first_index + index,
348
+ node.values[index])
349
+ end
350
+ end
351
+ end
352
+ end
353
+
354
+ true
355
+ end
356
+
357
+ def check_leaf_node_links
358
+ if @prev_sibling.nil?
359
+ if @tree.first_leaf != self
360
+ error "Leaf node #{@_id} has no previous sibling " +
361
+ "but is not the first leaf of the tree"
362
+ return false
363
+ end
364
+ elsif @prev_sibling.next_sibling != self
365
+ error "next_sibling of previous sibling does not point to " +
366
+ "this node"
367
+ return false
368
+ end
369
+
370
+ if @next_sibling.nil?
371
+ if @tree.last_leaf != self
372
+ error "Leaf node #{@_id} has no next sibling " +
373
+ "but is not the last leaf of the tree"
374
+ return false
375
+ end
376
+ elsif @next_sibling.prev_sibling != self
377
+ error "previous_sibling of next sibling does not point to " +
378
+ "this node"
379
+ return false
380
+ end
381
+
382
+ true
383
+ end
384
+
385
+ def check_offsets
386
+ return true if @parent.nil? && @offsets.empty?
387
+
388
+ if @offsets[0] != 0
389
+ error "First offset is not 0: #{@offsets.inspect}"
390
+ return false
391
+ end
392
+
393
+ last_offset = nil
394
+ @offsets.each_with_index do |offset, i|
395
+ if i > 0
396
+ if offset < last_offset
397
+ error "Offsets are not strictly monotoneously " +
398
+ "increasing: #{@offsets.inspect}"
399
+ return false
400
+ end
401
+ expected_offset = last_offset + @children[i - 1].values_count
402
+ if offset != expected_offset
403
+ error "Offset #{i} must be #{expected_offset} " +
404
+ "but is #{offset}."
405
+ return false
406
+ end
407
+ end
408
+
409
+ last_offset = offset
410
+ end
411
+
412
+ true
413
+ end
414
+
415
+ def check_child_nodes(stack)
416
+ if @children.uniq.size != @children.size
417
+ error "Node #{@_id} has multiple identical children"
418
+ return false
419
+ end
420
+
421
+ @children.each_with_index do |child, i|
422
+ unless child.is_a?(BigArrayNode)
423
+ error "Child #{@_id} is of class #{child.class} " +
424
+ "instead of BigArrayNode"
425
+ return false
426
+ end
427
+
428
+ unless child.parent.is_a?(BigArrayNode)
429
+ error "Parent reference of child #{i} is of class " +
430
+ "#{child.class} instead of BigArrayNode"
431
+ return false
432
+ end
433
+
434
+ if child.parent != self
435
+ error "Child node #{child._id} has wrong parent " +
436
+ "#{child.parent._id}. It should be #{@_id}."
437
+ return false
438
+ end
439
+
440
+ if child == self
441
+ error "Child #{i} point to self"
442
+ return false
443
+ end
444
+
445
+ if stack.include?(child)
446
+ error "Child #{i} points to ancester node"
447
+ return false
448
+ end
449
+
450
+ unless child.parent == self
451
+ error "Child #{i} does not have parent pointing " +
452
+ "to this node"
453
+ return false
454
+ end
455
+ end
456
+
457
+ true
458
+ end
459
+
460
+ # @return [String] Human reable form of the sub-tree.
461
+ def to_s
462
+ str = ''
463
+
464
+ traverse do |node, position, stack|
465
+ if position == 0
466
+ begin
467
+ str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
468
+ "#{node.tree_branch_mark}-" +
469
+ "#{node.size == 0 ? '--' : 'v-'}#{node.tree_summary}\n"
470
+ rescue => e
471
+ str += "@@@@@@@@@@: #{e.message}\n"
472
+ end
473
+ else
474
+ begin
475
+ if node.is_leaf?
476
+ if position <= node.size
477
+ str += "#{node.tree_prefix} " +
478
+ "#{position == node.size ? '-' : '|'} " +
479
+ "[ #{node.value_index(position - 1)}: " +
480
+ "#{node.values[position - 1].nil? ?
481
+ 'nil' : node.values[position - 1]} ]\n"
482
+ end
483
+ end
484
+ rescue => e
485
+ str += "@@@@@@@@@@: #{e.message}\n"
486
+ end
487
+ end
488
+ end
489
+
490
+ str
491
+ end
492
+
493
+ # Split the current node into two nodes. The upper half of the elements
494
+ # will be moved into a newly created node. This node will retain the lower
495
+ # half.
496
+ # @return [BigArrayNode] common parent of the two nodes
497
+ def split_node
498
+ unless @parent
499
+ # The node is the root node. We need to create a parent node first.
500
+ self.parent = @store.new(BigArrayNode, @tree, false)
501
+ @parent.offsets[0] = 0
502
+ @parent.children[0] = myself
503
+ @tree.root = @parent
504
+ end
505
+
506
+ # Create the new sibling that will take the 2nd half of the
507
+ # node content.
508
+ sibling = @store.new(BigArrayNode, @tree, is_leaf?, @parent, myself,
509
+ @next_sibling)
510
+ # Determine the index of the middle element that gets moved to the
511
+ # parent. The node size must be an uneven number.
512
+ mid = size / 2
513
+ if is_leaf?
514
+ # Before:
515
+ # +--------------------------+
516
+ # | 0 4 7 |
517
+ # | | |
518
+ # v v v
519
+ # +---------++-------++----------+
520
+ # | A B C D || E F G || H I J K |
521
+ #
522
+ # After:
523
+ # +--------------------------+
524
+ # | 0 2 4 7 |
525
+ # | | | |
526
+ # v v v v
527
+ # +-----++----++-------++----------+
528
+ # | A B || C D || E F G || H I J K |
529
+ #
530
+ #
531
+ # Insert the middle element key into the parent node
532
+ @parent.insert_child_after_peer(mid, sibling, self)
533
+ # Copy the values from the mid element onwards into the new
534
+ # sibling node.
535
+ sibling.values += @values[mid..-1]
536
+ # Delete the copied offsets and values from this node.
537
+ @values.slice!(mid..-1)
538
+ else
539
+ # Before:
540
+ # +--------------+
541
+ # | 0 11 |
542
+ # | |
543
+ # v v
544
+ # +----------++-------+
545
+ # | 0 4 7 10 || 0 2 5 |
546
+ # | | | | | | |
547
+ # v v v v v v v
548
+ #
549
+ # After:
550
+ # +------------------+
551
+ # | 0 7 11 |
552
+ # | | |
553
+ # v v v
554
+ # +-----++-----++-------+
555
+ # | 0 4 0 3 || 0 2 5 |
556
+ # | | | | | | |
557
+ # v v v v v v v
558
+ #
559
+ # Insert the new sibling into the parent node.
560
+ offset_delta = @offsets[mid]
561
+ @parent.insert_child_after_peer(offset_delta, sibling, self)
562
+ # Copy the offsets from after the mid value onwards to the new sibling
563
+ # node. We substract the offset delta from each of them.
564
+ sibling.offsets += @offsets[mid..-1].map{ |v| v - offset_delta }
565
+ # Delete the copied offsets from this node.
566
+ @offsets.slice!(mid..-1)
567
+ # Same copy for the children.
568
+ sibling.children += @children[mid..-1]
569
+ # Reparent the children to the new sibling parent.
570
+ sibling.children.each { |c| c.parent = sibling }
571
+ # And delete the copied children references.
572
+ @children.slice!(mid..-1)
573
+ end
574
+
575
+ @parent
576
+ end
577
+
578
+ def insert_child_after_peer(offset, node, peer = nil)
579
+ peer_index = @children.find_index(peer)
580
+ cidx = peer_index ? peer_index + 1 : 0
581
+ @offsets.insert(cidx, @offsets[peer_index] + offset)
582
+ @children.insert(cidx, node)
583
+ end
584
+
585
+ def consolidate_child_nodes(child)
586
+ unless (child_index = @children.index(child))
587
+ error "Cannot find child to consolidate"
588
+ end
589
+
590
+ if child_index == 0
591
+ # Consolidate with successor if it exists.
592
+ return unless (succ = @children[child_index + 1])
593
+
594
+ if child.size + succ.size <= @tree.node_size
595
+ # merge child with successor
596
+ merge_child_with_next(child_index)
597
+ else
598
+ move_first_element_of_successor_to_child(child_index)
599
+ end
600
+ else
601
+ # consolidate with predecessor
602
+ pred = @children[child_index - 1]
603
+
604
+ if pred.size + child.size <= @tree.node_size
605
+ # merge child with predecessor
606
+ merge_child_with_next(child_index - 1)
607
+ else
608
+ move_last_element_of_predecessor_to_child(child_index)
609
+ end
610
+ end
611
+ end
612
+
613
+ # @param offset [Integer] offset to search the child index for
614
+ # @return [Integer] Index of the matching offset or the insert position.
615
+ def search_child_index(offset)
616
+ # Handle special case for empty offsets list.
617
+ return 0 if @offsets.empty? || offset <= @offsets.first
618
+
619
+ (@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
620
+ end
621
+
622
+ # @return The index of the current node in the children list of the parent
623
+ # node. If the node is the root node, nil is returned.
624
+ def index_in_parent_node
625
+ return nil unless @parent
626
+
627
+ @parent.children.find_index(self)
628
+ end
629
+
630
+ def first_index
631
+ # TODO: This is a very expensive method. Find a way to make this way
632
+ # faster.
633
+ node = parent
634
+ child = myself
635
+ while node
636
+ if (index = node.children.index(child)) && index > 0
637
+ return node.offsets[index - 1]
638
+ end
639
+ child = node
640
+ node = node.parent
641
+ end
642
+
643
+ 0
644
+ end
645
+
646
+ # Compute the array index of the value with the given index in the current
647
+ # node.
648
+ # @param idx [Integer] Index of the value in the current node
649
+ # @return [Integer] Array index of the value
650
+ def value_index(idx)
651
+ node = self
652
+ while node.parent
653
+ idx += node.parent.offsets[node.index_in_parent_node]
654
+ node = node.parent
655
+ end
656
+
657
+ idx
658
+ end
659
+
660
+ # This method takes care of adjusting the offsets in tree in case elements
661
+ # were inserted or removed. All nodes that hold children after the
662
+ # insert/remove operation need to be adjusted. Since child nodes get their
663
+ # offsets via their parents, only the parent node and the direct ancestor
664
+ # followers need to be adjusted.
665
+ # @param after_child [BigArrayNode] specifies the modified leaf node
666
+ # @param delta [Integer] specifies how many elements were inserted or
667
+ # removed.
668
+ def adjust_offsets(after_child, delta)
669
+ node = self
670
+
671
+ while node
672
+ adjust = false
673
+ 0.upto(node.children.size - 1) do |i|
674
+ # Iterate over the children until we have found the after_child
675
+ # node. Then turn on adjustment mode. The offsets of the following
676
+ # entries will be adjusted by delta.
677
+ if adjust
678
+ node.offsets[i] += delta
679
+ elsif node.children[i] == after_child
680
+ adjust = true
681
+ end
682
+ end
683
+
684
+ unless adjust
685
+ node.fatal "Could not find child #{after_child._id}"
686
+ end
687
+
688
+ after_child = node
689
+ node = node.parent
690
+ end
691
+ end
692
+
693
+ # This is a generic tree iterator. It yields before it descends into the
694
+ # child node and after (which is identical to before the next child
695
+ # descend). It yields the node, the position and the stack of parent
696
+ # nodes.
697
+ # @yield [node, position, stack]
698
+ def traverse
699
+ # We use a non-recursive implementation to traverse the tree. This stack
700
+ # keeps track of all the known still to be checked nodes.
701
+ stack = [ [ self, 0 ] ]
702
+
703
+ while !stack.empty?
704
+ node, position = stack.pop
705
+
706
+ # Call the payload method. The position marks where we are in the node
707
+ # with respect to the traversal. 0 means we've just entered the node
708
+ # for the first time and are about to descent to the first child.
709
+ # Position 1 is after the 1st child has been processed and before the
710
+ # 2nd child is being processed. If we have N children, the last
711
+ # position is N after we have processed the last child and are about
712
+ # to return to the parent node.
713
+ yield(node, position, stack)
714
+
715
+ if position <= node.size
716
+ # Push the next position for this node onto the stack.
717
+ stack.push([ node, position + 1 ])
718
+
719
+ if !node.is_leaf? && node.children[position]
720
+ # If we have a child node for this position, push the linked node
721
+ # and the starting position onto the stack.
722
+ stack.push([ node.children[position], 0 ])
723
+ end
724
+ end
725
+ end
726
+ end
727
+
728
+ # Gather some statistics about the node and all sub nodes.
729
+ # @param stats [Stats] Data structure that stores the gathered data
730
+ def statistics(stats)
731
+ traverse do |node, position, stack|
732
+ if position == 0
733
+ if node.is_leaf?
734
+ stats.leaf_nodes += 1
735
+ depth = stack.size + 1
736
+ if stats.min_depth.nil? || stats.min_depth < depth
737
+ stats.min_depth = depth
738
+ end
739
+ if stats.max_depth.nil? || stats.max_depth > depth
740
+ stats.max_depth = depth
741
+ end
742
+ else
743
+ stats.branch_nodes += 1
744
+ end
745
+ end
746
+ end
747
+ end
748
+
749
+ # Return the decoration that marks the tree structure of this node for the
750
+ # inspection method.
751
+ def tree_prefix
752
+ node = self
753
+ str = ''
754
+
755
+ while node
756
+ is_last_child = false
757
+ if node.parent
758
+ is_last_child = node.parent.children.last == node
759
+ else
760
+ # Don't add lines for the top-level.
761
+ break
762
+ end
763
+
764
+ str = (is_last_child ? ' ' : ' |') + str
765
+ node = node.parent
766
+ end
767
+
768
+ str
769
+ end
770
+
771
+ # Branch node decoration for the inspection method.
772
+ def tree_branch_mark
773
+ return '' unless @parent
774
+ '-'
775
+ end
776
+
777
+ # Text for the node line for the inspection method.
778
+ def tree_summary
779
+ s = " @#{@_id}"
780
+ if @parent
781
+ begin
782
+ s += " +#{@parent.offsets[index_in_parent_node]} ^#{@parent._id}"
783
+ rescue
784
+ s += ' ^@'
785
+ end
786
+ end
787
+ if @prev_sibling
788
+ begin
789
+ s += " <#{@prev_sibling._id}"
790
+ rescue
791
+ s += ' <@'
792
+ end
793
+ end
794
+ if @next_sibling
795
+ begin
796
+ s += " >#{@next_sibling._id}"
797
+ rescue
798
+ s += ' >@'
799
+ end
800
+ end
801
+
802
+ s
803
+ end
804
+
805
+ # Print and log an error message for the node.
806
+ def error(msg)
807
+ msg = "Error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
808
+ $stderr.puts msg
809
+ PEROBS.log.error msg
810
+ end
811
+
812
+ # Print and log an error message for the node.
813
+ def fatal(msg)
814
+ msg = "Fatal error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
815
+ $stderr.puts msg
816
+ PEROBS.log.fatal msg
817
+ end
818
+
819
+ private
820
+
821
+ def min_size
822
+ @tree.node_size / 2
823
+ end
824
+
825
+ # Move first element of successor to end of child node
826
+ # @param child_index [Integer] index of the child
827
+ def move_first_element_of_successor_to_child(child_index)
828
+ child = @children[child_index]
829
+ succ = @children[child_index + 1]
830
+
831
+ if child.is_leaf?
832
+ # Adjust offset for the successor node
833
+ @offsets[child_index + 1] += 1
834
+ # Move the value
835
+ child.values << succ.values.shift
836
+ else
837
+ # Before:
838
+ #
839
+ # Root Node +--------------------------------+
840
+ # Offsets | 0 7 |
841
+ # Children | |
842
+ # child v succ v
843
+ # Level 1 +---------------++-------------------------------------+
844
+ # Offsets | 0 4 || 0 4 6 9 |
845
+ # Children | | | | | |
846
+ # v v v v v v
847
+ # Leaves +---------++-------++----------++-------++----------++-------+
848
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
849
+ #
850
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
851
+ #
852
+ # After:
853
+ #
854
+ # Root Node +--------------------------------+
855
+ # Offsets | 0 11 |
856
+ # Children | |
857
+ # child v succ v
858
+ # Level 1 +--------------------------++--------------------------+
859
+ # Offsets | 0 4 7 || 0 2 5 |
860
+ # Children | | | | | |
861
+ # v v v v v v
862
+ # Leaves +---------++-------++----------++-------++----------++-------+
863
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
864
+ #
865
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
866
+ #
867
+ # Adjust the offsets of the successor. The 2nd original offset
868
+ # determines the delta for the parent node.
869
+ succ.offsets.shift
870
+ delta = succ.offsets.first
871
+ succ.offsets.map! { |o| o -= delta }
872
+ # The additional child offset can be taken from the parent node
873
+ # reference.
874
+ child.offsets << @offsets[child_index + 1]
875
+ # The parent node offset of the successor needs to be corrected by the
876
+ # delta value.
877
+ @offsets[child_index + 1] += delta
878
+ # Move the child reference
879
+ child.children << succ.children.shift
880
+ child.children.last.parent = child
881
+ end
882
+ end
883
+
884
+ # Move last element of predecessor node to child
885
+ # @param child_index [Integer] index of the child
886
+ def move_last_element_of_predecessor_to_child(child_index)
887
+ pred = @children[child_index - 1]
888
+ child = @children[child_index]
889
+
890
+ if child.is_leaf?
891
+ # Adjust offset for the predecessor node
892
+ @offsets[child_index] -= 1
893
+ # Move the value
894
+ child.values.unshift(pred.values.pop)
895
+ else
896
+ # Before:
897
+ #
898
+ # Root Node +--------------------------------+
899
+ # Offsets | 0 13 |
900
+ # Children | |
901
+ # pred v child v
902
+ # Level 1 +---------------------------------++-------------------+
903
+ # Offsets | 0 4 7 11 || 0 3 |
904
+ # Children | | | | | |
905
+ # v v v v v v
906
+ # Leaves +---------++-------++----------++-------++----------++-------+
907
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
908
+ #
909
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
910
+ #
911
+ # After:
912
+ #
913
+ # Root Node +--------------------------------+
914
+ # Offsets | 0 11 |
915
+ # Children | |
916
+ # pred v child v
917
+ # Level 1 +--------------------------++--------------------------+
918
+ # Offsets | 0 4 7 || 0 2 5 |
919
+ # Children | | | | | |
920
+ # v v v v v v
921
+ # Leaves +---------++-------++----------++-------++----------++-------+
922
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
923
+ #
924
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
925
+ #
926
+ # Remove the last predecessor offset and update the child offset with
927
+ # it
928
+ delta = pred.children.last.values_count
929
+ @offsets[child_index] -= delta
930
+ pred.offsets.pop
931
+ # Adjust all the offsets of the child
932
+ child.offsets.map! { |o| o += delta }
933
+ # And prepend the 0 offset
934
+ child.offsets.unshift(0)
935
+ # Move the child reference
936
+ child.children.unshift(pred.children.pop)
937
+ child.children.first.parent = child
938
+ end
939
+ end
940
+
941
+ def merge_child_with_next(child_index)
942
+ c1 = @children[child_index]
943
+ c2 = @children[child_index + 1]
944
+
945
+ if c1.is_leaf?
946
+ # Update the sibling links
947
+ c1.next_sibling = c2.next_sibling
948
+ c1.next_sibling.prev_sibling = c1 if c1.next_sibling
949
+
950
+ c1.values += c2.values
951
+ # Adjust the last_leaf reference in the @tree if c1 is now the last
952
+ # sibling.
953
+ @tree.last_leaf = c1 unless c1.next_sibling
954
+ else
955
+ # Before:
956
+ #
957
+ # Root Node +---------------------+
958
+ # Offsets | 0 11 |
959
+ # Children | |
960
+ # c1 v c2 v
961
+ # Level 1 +--------------------------++-----+
962
+ # Offsets | 0 4 7 || 0 |
963
+ # Children | | | |
964
+ # v v v v
965
+ # Leaves +---------++-------++----------++-------+
966
+ # Values | A B C D || E F G || H I J K || L M |
967
+ #
968
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
969
+ #
970
+ # After:
971
+ #
972
+ # Root Node +---+
973
+ # Offsets | 0 |
974
+ # Children |
975
+ # c1 v
976
+ # Level 1 +---------------------------------+
977
+ # Offsets | 0 4 7 11 |
978
+ # Children | | | |
979
+ # v v v v
980
+ # Leaves +---------++-------++----------++-------+
981
+ # Values | A B C D || E F G || H I J K || L M |
982
+ #
983
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
984
+ delta = @offsets[child_index + 1] - @offsets[child_index]
985
+ c1.offsets += c2.offsets.map { |o| o += delta }
986
+ c2.children.each { |c| c.parent = c1 }
987
+ c1.children += c2.children
988
+ end
989
+
990
+ # Remove the child successor from the node.
991
+ @offsets.delete_at(child_index + 1)
992
+ @children.delete_at(child_index + 1)
993
+
994
+ if @parent && size < min_size
995
+ @parent.consolidate_child_nodes(self)
996
+ end
997
+ end
998
+
999
+ end
1000
+
1001
+ end
1002
+