perobs 4.0.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,1002 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigArrayNode.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017, 2018, 2019
|
6
|
+
# by Chris Schlaeger <chris@taskjuggler.org>
|
7
|
+
#
|
8
|
+
# MIT License
|
9
|
+
#
|
10
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
11
|
+
# a copy of this software and associated documentation files (the
|
12
|
+
# "Software"), to deal in the Software without restriction, including
|
13
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
14
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
15
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
16
|
+
# the following conditions:
|
17
|
+
#
|
18
|
+
# The above copyright notice and this permission notice shall be
|
19
|
+
# included in all copies or substantial portions of the Software.
|
20
|
+
#
|
21
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
22
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
23
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
24
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
25
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
26
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
27
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
28
|
+
|
29
|
+
require 'perobs/Object'
|
30
|
+
require 'perobs/Array'
|
31
|
+
|
32
|
+
module PEROBS
|
33
|
+
|
34
|
+
# The BigArrayNode class provides the BTree nodes for the BigArray objects.
|
35
|
+
# A node can either be a branch node or a leaf node. Branch nodes don't
|
36
|
+
# store values, only offsets and references to child nodes. Leaf nodes don't
|
37
|
+
# have child nodes but store the actual values. The leaf nodes always
|
38
|
+
# contain at least node_size / 2 number of consecutive values. The index of
|
39
|
+
# the first value in the BigArray is the sum of the offsets stored in the
|
40
|
+
# parent nodes. Branch nodes store the offsets and the corresponding
|
41
|
+
# child node references. The first offset is always 0. Consecutive offsets
|
42
|
+
# are set to the previous offset plus the total number of values stored in
|
43
|
+
# the previous child node. The leaf nodes don't contain wholes. A
|
44
|
+
# concatenation of all leaf node values represents the stored Array.
|
45
|
+
#
|
46
|
+
# Root Node +--------------------------------+
|
47
|
+
# Offsets | 0 11 |
|
48
|
+
# Children | |
|
49
|
+
# v v
|
50
|
+
# Level 1 +--------------------------++--------------------------+
|
51
|
+
# Offsets | 0 4 7 || 0 2 5 |
|
52
|
+
# Children | | | | | |
|
53
|
+
# v v v v v v
|
54
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
55
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
56
|
+
#
|
57
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
58
|
+
#
|
59
|
+
class BigArrayNode < PEROBS::Object
|
60
|
+
|
61
|
+
attr_persist :tree, :parent, :offsets, :values, :children,
|
62
|
+
:prev_sibling, :next_sibling
|
63
|
+
|
64
|
+
# Internal constructor. Use Store.new(BigArrayNode, ...) instead.
|
65
|
+
# @param p [Handle]
|
66
|
+
# @param tree [BigArray] The tree this node should belong to
|
67
|
+
# @param is_leaf [Boolean] True if a leaf node should be created, false
|
68
|
+
# for a branch node.
|
69
|
+
# @param parent [BigArrayNode] Parent node
|
70
|
+
# @param prev_sibling [BigArrayNode] Previous sibling
|
71
|
+
# @param next_sibling [BigArrayNode] Next sibling
|
72
|
+
def initialize(p, tree, is_leaf, parent = nil,
|
73
|
+
prev_sibling = nil, next_sibling = nil)
|
74
|
+
super(p)
|
75
|
+
self.tree = tree
|
76
|
+
self.parent = parent
|
77
|
+
|
78
|
+
if is_leaf
|
79
|
+
# Create a new leaf node. It stores values and has no children.
|
80
|
+
self.values = @store.new(PEROBS::Array)
|
81
|
+
self.children = self.offsets = nil
|
82
|
+
|
83
|
+
# Link the neighboring siblings to the newly inserted node. If the
|
84
|
+
# node has no sibling on a side we also must register it as first or
|
85
|
+
# last leaf with the BigArray object.
|
86
|
+
if (self.prev_sibling = prev_sibling)
|
87
|
+
@prev_sibling.next_sibling = myself
|
88
|
+
else
|
89
|
+
@tree.first_leaf = myself
|
90
|
+
end
|
91
|
+
if (self.next_sibling = next_sibling)
|
92
|
+
@next_sibling.prev_sibling = myself
|
93
|
+
else
|
94
|
+
@tree.last_leaf = myself
|
95
|
+
end
|
96
|
+
else
|
97
|
+
# Create a new branch node. It stores keys and child node references
|
98
|
+
# but no values.
|
99
|
+
self.offsets = @store.new(PEROBS::Array)
|
100
|
+
self.children = @store.new(PEROBS::Array)
|
101
|
+
self.values = nil
|
102
|
+
# Branch nodes don't need sibling links.
|
103
|
+
self.prev_sibling = self.next_sibling = nil
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return [Boolean] True if this is a leaf node, false otherwise.
|
108
|
+
def is_leaf?
|
109
|
+
@children.nil?
|
110
|
+
end
|
111
|
+
|
112
|
+
def size
|
113
|
+
is_leaf? ? @values.size : @children.size
|
114
|
+
end
|
115
|
+
|
116
|
+
# @return [Integer] the number of values stored in this node.
|
117
|
+
def values_count
|
118
|
+
count = 0
|
119
|
+
node = self
|
120
|
+
while node
|
121
|
+
if node.is_leaf?
|
122
|
+
return count + node.values.size
|
123
|
+
else
|
124
|
+
count += node.offsets.last
|
125
|
+
node = node.children.last
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
# Set the given value at the given index.
|
132
|
+
# @param index [Integer] Position to insert at
|
133
|
+
# @param value [Integer] value to insert
|
134
|
+
def set(index, value)
|
135
|
+
node = self
|
136
|
+
|
137
|
+
# Traverse the tree to find the right node to add or replace the value.
|
138
|
+
while node do
|
139
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
140
|
+
if node.is_leaf?
|
141
|
+
if index >= node.values.size
|
142
|
+
node.fatal "Set index (#{index}) larger than values array " +
|
143
|
+
"(#{node.values.size})."
|
144
|
+
end
|
145
|
+
node.values[index] = value
|
146
|
+
return
|
147
|
+
else
|
148
|
+
# Descend into the right child node to add the value to.
|
149
|
+
cidx = node.search_child_index(index)
|
150
|
+
if (index -= node.offsets[cidx]) < 0
|
151
|
+
node.fatal "Index (#{index}) became negative"
|
152
|
+
end
|
153
|
+
node = node.children[cidx]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
node.fatal "Could not find proper node to set the value while " +
|
158
|
+
"looking for index #{index}"
|
159
|
+
end
|
160
|
+
|
161
|
+
# Insert the given value at the given index. All following values will be
|
162
|
+
# pushed to a higher index.
|
163
|
+
# @param index [Integer] Position to insert at
|
164
|
+
# @param value [Integer] value to insert
|
165
|
+
def insert(index, value)
|
166
|
+
node = self
|
167
|
+
cidx = nil
|
168
|
+
|
169
|
+
# Traverse the tree to find the right node to add or replace the value.
|
170
|
+
while node do
|
171
|
+
# All nodes that we find on the way that are full will be split into
|
172
|
+
# two half-full nodes.
|
173
|
+
if node.size >= @tree.node_size
|
174
|
+
# Re-add the index from the last parent node since we will descent
|
175
|
+
# into one of the split nodes.
|
176
|
+
index += node.parent.offsets[cidx] if node.parent
|
177
|
+
node = node.split_node
|
178
|
+
end
|
179
|
+
|
180
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
181
|
+
if node.is_leaf?
|
182
|
+
node.values.insert(index, value)
|
183
|
+
node.parent.adjust_offsets(node, 1) if node.parent
|
184
|
+
return
|
185
|
+
else
|
186
|
+
# Descend into the right child node to add the value to.
|
187
|
+
cidx = node.search_child_index(index)
|
188
|
+
if (index -= node.offsets[cidx]) < 0
|
189
|
+
node.fatal "Index (#{index}) became negative"
|
190
|
+
end
|
191
|
+
node = node.children[cidx]
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
node.fatal "Could not find proper node to insert the value while " +
|
196
|
+
"looking for index #{index}"
|
197
|
+
end
|
198
|
+
|
199
|
+
# Return the value that matches the given key or return nil if they key is
|
200
|
+
# unknown.
|
201
|
+
# @param index [Integer] Position to insert at
|
202
|
+
# @return [Integer or nil] value that matches the key
|
203
|
+
def get(index)
|
204
|
+
node = self
|
205
|
+
|
206
|
+
# Traverse the tree to find the right node to add or replace the value.
|
207
|
+
while node do
|
208
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
209
|
+
if node.is_leaf?
|
210
|
+
return node.values[index]
|
211
|
+
else
|
212
|
+
# Descend into the right child node to add the value to.
|
213
|
+
cidx = (node.offsets.bsearch_index { |o| o > index } ||
|
214
|
+
node.offsets.length) - 1
|
215
|
+
if (index -= node.offsets[cidx]) < 0
|
216
|
+
node.fatal "Index (#{index}) became negative"
|
217
|
+
end
|
218
|
+
node = node.children[cidx]
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
223
|
+
"looking for index #{index}"
|
224
|
+
end
|
225
|
+
|
226
|
+
# Delete the element at the specified index, returning that element, or
|
227
|
+
# nil if the index is out of range.
|
228
|
+
# @param index [Integer] Index in the BigArray
|
229
|
+
# @return [Object] found value or nil
|
230
|
+
def delete_at(index)
|
231
|
+
node = self
|
232
|
+
deleted_value = nil
|
233
|
+
|
234
|
+
while node do
|
235
|
+
if node.is_leaf?
|
236
|
+
deleted_value = node.values.delete_at(index)
|
237
|
+
if node.parent
|
238
|
+
node.parent.adjust_offsets(node, -1)
|
239
|
+
if node.size < min_size
|
240
|
+
node.parent.consolidate_child_nodes(node)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
return deleted_value
|
245
|
+
else
|
246
|
+
# Descend into the right child node to add the value to.
|
247
|
+
cidx = (node.offsets.bsearch_index { |o| o > index } ||
|
248
|
+
node.offsets.length) - 1
|
249
|
+
if (index -= node.offsets[cidx]) < 0
|
250
|
+
node.fatal "Index (#{index}) became negative"
|
251
|
+
end
|
252
|
+
node = node.children[cidx]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
PEROBS.log.fatal "Could not find proper node to delete from while " +
|
257
|
+
"looking for index #{index}"
|
258
|
+
end
|
259
|
+
|
260
|
+
# Iterate over all the values of the node.
|
261
|
+
# @yield [value]
|
262
|
+
def each
|
263
|
+
return nil unless is_leaf?
|
264
|
+
|
265
|
+
@values.each do |v|
|
266
|
+
yield(v)
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# Iterate over all the values of the node in reverse order.
|
271
|
+
# @yield [value]
|
272
|
+
def reverse_each
|
273
|
+
return nil unless is_leaf?
|
274
|
+
|
275
|
+
@values.reverse_each do |v|
|
276
|
+
yield(v)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
# Check consistency of the node and all subsequent nodes. In case an error
|
281
|
+
# is found, a message is logged and false is returned.
|
282
|
+
# @yield [key, value]
|
283
|
+
# @return [Boolean] true if tree has no errors
|
284
|
+
def check
|
285
|
+
branch_depth = nil
|
286
|
+
|
287
|
+
traverse do |node, position, stack|
|
288
|
+
if position == 0
|
289
|
+
# Nodes should have between min_size() and
|
290
|
+
# @tree.node_size children or values. Only the root node may have
|
291
|
+
# less.
|
292
|
+
if node.size > @tree.node_size
|
293
|
+
node.error "BigArray node #{node._id} is too large. It has " +
|
294
|
+
"#{node.size} nodes instead of max. #{@tree.node_size}."
|
295
|
+
return false
|
296
|
+
end
|
297
|
+
if node.parent && node.size < min_size
|
298
|
+
node.error "BigArray node #{node._id} is too small"
|
299
|
+
return false
|
300
|
+
end
|
301
|
+
|
302
|
+
if node.is_leaf?
|
303
|
+
# All leaf nodes must have same distance from root node.
|
304
|
+
if branch_depth
|
305
|
+
unless branch_depth == stack.size
|
306
|
+
node.error "All leaf nodes must have same distance from root"
|
307
|
+
return false
|
308
|
+
end
|
309
|
+
else
|
310
|
+
branch_depth = stack.size
|
311
|
+
end
|
312
|
+
|
313
|
+
return false unless node.check_leaf_node_links
|
314
|
+
|
315
|
+
if node.children
|
316
|
+
node.error "children must be nil for a leaf node"
|
317
|
+
return false
|
318
|
+
end
|
319
|
+
else
|
320
|
+
unless node.children.size == node.offsets.size
|
321
|
+
node.error "Offset count (#{node.offsets.size}) must be equal " +
|
322
|
+
"to children count (#{node.children.size})"
|
323
|
+
return false
|
324
|
+
end
|
325
|
+
|
326
|
+
if node.values
|
327
|
+
node.error "values must be nil for a branch node"
|
328
|
+
return false
|
329
|
+
end
|
330
|
+
|
331
|
+
unless @prev_sibling.nil? && @next_sibling.nil?
|
332
|
+
node.error "prev_sibling and next_sibling must be nil for " +
|
333
|
+
"branch nodes"
|
334
|
+
end
|
335
|
+
|
336
|
+
return false unless node.check_offsets
|
337
|
+
|
338
|
+
return false unless node.check_child_nodes(stack)
|
339
|
+
end
|
340
|
+
elsif position <= node.size
|
341
|
+
# These checks are done after we have completed the respective child
|
342
|
+
# node with index 'position - 1'.
|
343
|
+
index = position - 1
|
344
|
+
if node.is_leaf?
|
345
|
+
if block_given?
|
346
|
+
# If a block was given, call this block with the key and value.
|
347
|
+
return false unless yield(node.first_index + index,
|
348
|
+
node.values[index])
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
true
|
355
|
+
end
|
356
|
+
|
357
|
+
def check_leaf_node_links
|
358
|
+
if @prev_sibling.nil?
|
359
|
+
if @tree.first_leaf != self
|
360
|
+
error "Leaf node #{@_id} has no previous sibling " +
|
361
|
+
"but is not the first leaf of the tree"
|
362
|
+
return false
|
363
|
+
end
|
364
|
+
elsif @prev_sibling.next_sibling != self
|
365
|
+
error "next_sibling of previous sibling does not point to " +
|
366
|
+
"this node"
|
367
|
+
return false
|
368
|
+
end
|
369
|
+
|
370
|
+
if @next_sibling.nil?
|
371
|
+
if @tree.last_leaf != self
|
372
|
+
error "Leaf node #{@_id} has no next sibling " +
|
373
|
+
"but is not the last leaf of the tree"
|
374
|
+
return false
|
375
|
+
end
|
376
|
+
elsif @next_sibling.prev_sibling != self
|
377
|
+
error "previous_sibling of next sibling does not point to " +
|
378
|
+
"this node"
|
379
|
+
return false
|
380
|
+
end
|
381
|
+
|
382
|
+
true
|
383
|
+
end
|
384
|
+
|
385
|
+
def check_offsets
|
386
|
+
return true if @parent.nil? && @offsets.empty?
|
387
|
+
|
388
|
+
if @offsets[0] != 0
|
389
|
+
error "First offset is not 0: #{@offsets.inspect}"
|
390
|
+
return false
|
391
|
+
end
|
392
|
+
|
393
|
+
last_offset = nil
|
394
|
+
@offsets.each_with_index do |offset, i|
|
395
|
+
if i > 0
|
396
|
+
if offset < last_offset
|
397
|
+
error "Offsets are not strictly monotoneously " +
|
398
|
+
"increasing: #{@offsets.inspect}"
|
399
|
+
return false
|
400
|
+
end
|
401
|
+
expected_offset = last_offset + @children[i - 1].values_count
|
402
|
+
if offset != expected_offset
|
403
|
+
error "Offset #{i} must be #{expected_offset} " +
|
404
|
+
"but is #{offset}."
|
405
|
+
return false
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
last_offset = offset
|
410
|
+
end
|
411
|
+
|
412
|
+
true
|
413
|
+
end
|
414
|
+
|
415
|
+
def check_child_nodes(stack)
|
416
|
+
if @children.uniq.size != @children.size
|
417
|
+
error "Node #{@_id} has multiple identical children"
|
418
|
+
return false
|
419
|
+
end
|
420
|
+
|
421
|
+
@children.each_with_index do |child, i|
|
422
|
+
unless child.is_a?(BigArrayNode)
|
423
|
+
error "Child #{@_id} is of class #{child.class} " +
|
424
|
+
"instead of BigArrayNode"
|
425
|
+
return false
|
426
|
+
end
|
427
|
+
|
428
|
+
unless child.parent.is_a?(BigArrayNode)
|
429
|
+
error "Parent reference of child #{i} is of class " +
|
430
|
+
"#{child.class} instead of BigArrayNode"
|
431
|
+
return false
|
432
|
+
end
|
433
|
+
|
434
|
+
if child.parent != self
|
435
|
+
error "Child node #{child._id} has wrong parent " +
|
436
|
+
"#{child.parent._id}. It should be #{@_id}."
|
437
|
+
return false
|
438
|
+
end
|
439
|
+
|
440
|
+
if child == self
|
441
|
+
error "Child #{i} point to self"
|
442
|
+
return false
|
443
|
+
end
|
444
|
+
|
445
|
+
if stack.include?(child)
|
446
|
+
error "Child #{i} points to ancester node"
|
447
|
+
return false
|
448
|
+
end
|
449
|
+
|
450
|
+
unless child.parent == self
|
451
|
+
error "Child #{i} does not have parent pointing " +
|
452
|
+
"to this node"
|
453
|
+
return false
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
true
|
458
|
+
end
|
459
|
+
|
460
|
+
# @return [String] Human reable form of the sub-tree.
|
461
|
+
def to_s
|
462
|
+
str = ''
|
463
|
+
|
464
|
+
traverse do |node, position, stack|
|
465
|
+
if position == 0
|
466
|
+
begin
|
467
|
+
str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
|
468
|
+
"#{node.tree_branch_mark}-" +
|
469
|
+
"#{node.size == 0 ? '--' : 'v-'}#{node.tree_summary}\n"
|
470
|
+
rescue => e
|
471
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
472
|
+
end
|
473
|
+
else
|
474
|
+
begin
|
475
|
+
if node.is_leaf?
|
476
|
+
if position <= node.size
|
477
|
+
str += "#{node.tree_prefix} " +
|
478
|
+
"#{position == node.size ? '-' : '|'} " +
|
479
|
+
"[ #{node.value_index(position - 1)}: " +
|
480
|
+
"#{node.values[position - 1].nil? ?
|
481
|
+
'nil' : node.values[position - 1]} ]\n"
|
482
|
+
end
|
483
|
+
end
|
484
|
+
rescue => e
|
485
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
490
|
+
str
|
491
|
+
end
|
492
|
+
|
493
|
+
# Split the current node into two nodes. The upper half of the elements
|
494
|
+
# will be moved into a newly created node. This node will retain the lower
|
495
|
+
# half.
|
496
|
+
# @return [BigArrayNode] common parent of the two nodes
|
497
|
+
def split_node
|
498
|
+
unless @parent
|
499
|
+
# The node is the root node. We need to create a parent node first.
|
500
|
+
self.parent = @store.new(BigArrayNode, @tree, false)
|
501
|
+
@parent.offsets[0] = 0
|
502
|
+
@parent.children[0] = myself
|
503
|
+
@tree.root = @parent
|
504
|
+
end
|
505
|
+
|
506
|
+
# Create the new sibling that will take the 2nd half of the
|
507
|
+
# node content.
|
508
|
+
sibling = @store.new(BigArrayNode, @tree, is_leaf?, @parent, myself,
|
509
|
+
@next_sibling)
|
510
|
+
# Determine the index of the middle element that gets moved to the
|
511
|
+
# parent. The node size must be an uneven number.
|
512
|
+
mid = size / 2
|
513
|
+
if is_leaf?
|
514
|
+
# Before:
|
515
|
+
# +--------------------------+
|
516
|
+
# | 0 4 7 |
|
517
|
+
# | | |
|
518
|
+
# v v v
|
519
|
+
# +---------++-------++----------+
|
520
|
+
# | A B C D || E F G || H I J K |
|
521
|
+
#
|
522
|
+
# After:
|
523
|
+
# +--------------------------+
|
524
|
+
# | 0 2 4 7 |
|
525
|
+
# | | | |
|
526
|
+
# v v v v
|
527
|
+
# +-----++----++-------++----------+
|
528
|
+
# | A B || C D || E F G || H I J K |
|
529
|
+
#
|
530
|
+
#
|
531
|
+
# Insert the middle element key into the parent node
|
532
|
+
@parent.insert_child_after_peer(mid, sibling, self)
|
533
|
+
# Copy the values from the mid element onwards into the new
|
534
|
+
# sibling node.
|
535
|
+
sibling.values += @values[mid..-1]
|
536
|
+
# Delete the copied offsets and values from this node.
|
537
|
+
@values.slice!(mid..-1)
|
538
|
+
else
|
539
|
+
# Before:
|
540
|
+
# +--------------+
|
541
|
+
# | 0 11 |
|
542
|
+
# | |
|
543
|
+
# v v
|
544
|
+
# +----------++-------+
|
545
|
+
# | 0 4 7 10 || 0 2 5 |
|
546
|
+
# | | | | | | |
|
547
|
+
# v v v v v v v
|
548
|
+
#
|
549
|
+
# After:
|
550
|
+
# +------------------+
|
551
|
+
# | 0 7 11 |
|
552
|
+
# | | |
|
553
|
+
# v v v
|
554
|
+
# +-----++-----++-------+
|
555
|
+
# | 0 4 0 3 || 0 2 5 |
|
556
|
+
# | | | | | | |
|
557
|
+
# v v v v v v v
|
558
|
+
#
|
559
|
+
# Insert the new sibling into the parent node.
|
560
|
+
offset_delta = @offsets[mid]
|
561
|
+
@parent.insert_child_after_peer(offset_delta, sibling, self)
|
562
|
+
# Copy the offsets from after the mid value onwards to the new sibling
|
563
|
+
# node. We substract the offset delta from each of them.
|
564
|
+
sibling.offsets += @offsets[mid..-1].map{ |v| v - offset_delta }
|
565
|
+
# Delete the copied offsets from this node.
|
566
|
+
@offsets.slice!(mid..-1)
|
567
|
+
# Same copy for the children.
|
568
|
+
sibling.children += @children[mid..-1]
|
569
|
+
# Reparent the children to the new sibling parent.
|
570
|
+
sibling.children.each { |c| c.parent = sibling }
|
571
|
+
# And delete the copied children references.
|
572
|
+
@children.slice!(mid..-1)
|
573
|
+
end
|
574
|
+
|
575
|
+
@parent
|
576
|
+
end
|
577
|
+
|
578
|
+
def insert_child_after_peer(offset, node, peer = nil)
|
579
|
+
peer_index = @children.find_index(peer)
|
580
|
+
cidx = peer_index ? peer_index + 1 : 0
|
581
|
+
@offsets.insert(cidx, @offsets[peer_index] + offset)
|
582
|
+
@children.insert(cidx, node)
|
583
|
+
end
|
584
|
+
|
585
|
+
def consolidate_child_nodes(child)
|
586
|
+
unless (child_index = @children.index(child))
|
587
|
+
error "Cannot find child to consolidate"
|
588
|
+
end
|
589
|
+
|
590
|
+
if child_index == 0
|
591
|
+
# Consolidate with successor if it exists.
|
592
|
+
return unless (succ = @children[child_index + 1])
|
593
|
+
|
594
|
+
if child.size + succ.size <= @tree.node_size
|
595
|
+
# merge child with successor
|
596
|
+
merge_child_with_next(child_index)
|
597
|
+
else
|
598
|
+
move_first_element_of_successor_to_child(child_index)
|
599
|
+
end
|
600
|
+
else
|
601
|
+
# consolidate with predecessor
|
602
|
+
pred = @children[child_index - 1]
|
603
|
+
|
604
|
+
if pred.size + child.size <= @tree.node_size
|
605
|
+
# merge child with predecessor
|
606
|
+
merge_child_with_next(child_index - 1)
|
607
|
+
else
|
608
|
+
move_last_element_of_predecessor_to_child(child_index)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
# @param offset [Integer] offset to search the child index for
|
614
|
+
# @return [Integer] Index of the matching offset or the insert position.
|
615
|
+
def search_child_index(offset)
|
616
|
+
# Handle special case for empty offsets list.
|
617
|
+
return 0 if @offsets.empty? || offset <= @offsets.first
|
618
|
+
|
619
|
+
(@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
|
620
|
+
end
|
621
|
+
|
622
|
+
# @return The index of the current node in the children list of the parent
|
623
|
+
# node. If the node is the root node, nil is returned.
|
624
|
+
def index_in_parent_node
|
625
|
+
return nil unless @parent
|
626
|
+
|
627
|
+
@parent.children.find_index(self)
|
628
|
+
end
|
629
|
+
|
630
|
+
def first_index
|
631
|
+
# TODO: This is a very expensive method. Find a way to make this way
|
632
|
+
# faster.
|
633
|
+
node = parent
|
634
|
+
child = myself
|
635
|
+
while node
|
636
|
+
if (index = node.children.index(child)) && index > 0
|
637
|
+
return node.offsets[index - 1]
|
638
|
+
end
|
639
|
+
child = node
|
640
|
+
node = node.parent
|
641
|
+
end
|
642
|
+
|
643
|
+
0
|
644
|
+
end
|
645
|
+
|
646
|
+
# Compute the array index of the value with the given index in the current
|
647
|
+
# node.
|
648
|
+
# @param idx [Integer] Index of the value in the current node
|
649
|
+
# @return [Integer] Array index of the value
|
650
|
+
def value_index(idx)
|
651
|
+
node = self
|
652
|
+
while node.parent
|
653
|
+
idx += node.parent.offsets[node.index_in_parent_node]
|
654
|
+
node = node.parent
|
655
|
+
end
|
656
|
+
|
657
|
+
idx
|
658
|
+
end
|
659
|
+
|
660
|
+
# This method takes care of adjusting the offsets in tree in case elements
|
661
|
+
# were inserted or removed. All nodes that hold children after the
|
662
|
+
# insert/remove operation need to be adjusted. Since child nodes get their
|
663
|
+
# offsets via their parents, only the parent node and the direct ancestor
|
664
|
+
# followers need to be adjusted.
|
665
|
+
# @param after_child [BigArrayNode] specifies the modified leaf node
|
666
|
+
# @param delta [Integer] specifies how many elements were inserted or
|
667
|
+
# removed.
|
668
|
+
def adjust_offsets(after_child, delta)
|
669
|
+
node = self
|
670
|
+
|
671
|
+
while node
|
672
|
+
adjust = false
|
673
|
+
0.upto(node.children.size - 1) do |i|
|
674
|
+
# Iterate over the children until we have found the after_child
|
675
|
+
# node. Then turn on adjustment mode. The offsets of the following
|
676
|
+
# entries will be adjusted by delta.
|
677
|
+
if adjust
|
678
|
+
node.offsets[i] += delta
|
679
|
+
elsif node.children[i] == after_child
|
680
|
+
adjust = true
|
681
|
+
end
|
682
|
+
end
|
683
|
+
|
684
|
+
unless adjust
|
685
|
+
node.fatal "Could not find child #{after_child._id}"
|
686
|
+
end
|
687
|
+
|
688
|
+
after_child = node
|
689
|
+
node = node.parent
|
690
|
+
end
|
691
|
+
end
|
692
|
+
|
693
|
+
# This is a generic tree iterator. It yields before it descends into the
|
694
|
+
# child node and after (which is identical to before the next child
|
695
|
+
# descend). It yields the node, the position and the stack of parent
|
696
|
+
# nodes.
|
697
|
+
# @yield [node, position, stack]
|
698
|
+
def traverse
|
699
|
+
# We use a non-recursive implementation to traverse the tree. This stack
|
700
|
+
# keeps track of all the known still to be checked nodes.
|
701
|
+
stack = [ [ self, 0 ] ]
|
702
|
+
|
703
|
+
while !stack.empty?
|
704
|
+
node, position = stack.pop
|
705
|
+
|
706
|
+
# Call the payload method. The position marks where we are in the node
|
707
|
+
# with respect to the traversal. 0 means we've just entered the node
|
708
|
+
# for the first time and are about to descent to the first child.
|
709
|
+
# Position 1 is after the 1st child has been processed and before the
|
710
|
+
# 2nd child is being processed. If we have N children, the last
|
711
|
+
# position is N after we have processed the last child and are about
|
712
|
+
# to return to the parent node.
|
713
|
+
yield(node, position, stack)
|
714
|
+
|
715
|
+
if position <= node.size
|
716
|
+
# Push the next position for this node onto the stack.
|
717
|
+
stack.push([ node, position + 1 ])
|
718
|
+
|
719
|
+
if !node.is_leaf? && node.children[position]
|
720
|
+
# If we have a child node for this position, push the linked node
|
721
|
+
# and the starting position onto the stack.
|
722
|
+
stack.push([ node.children[position], 0 ])
|
723
|
+
end
|
724
|
+
end
|
725
|
+
end
|
726
|
+
end
|
727
|
+
|
728
|
+
# Gather some statistics about the node and all sub nodes.
|
729
|
+
# @param stats [Stats] Data structure that stores the gathered data
|
730
|
+
def statistics(stats)
|
731
|
+
traverse do |node, position, stack|
|
732
|
+
if position == 0
|
733
|
+
if node.is_leaf?
|
734
|
+
stats.leaf_nodes += 1
|
735
|
+
depth = stack.size + 1
|
736
|
+
if stats.min_depth.nil? || stats.min_depth < depth
|
737
|
+
stats.min_depth = depth
|
738
|
+
end
|
739
|
+
if stats.max_depth.nil? || stats.max_depth > depth
|
740
|
+
stats.max_depth = depth
|
741
|
+
end
|
742
|
+
else
|
743
|
+
stats.branch_nodes += 1
|
744
|
+
end
|
745
|
+
end
|
746
|
+
end
|
747
|
+
end
|
748
|
+
|
749
|
+
# Return the decoration that marks the tree structure of this node for the
|
750
|
+
# inspection method.
|
751
|
+
def tree_prefix
|
752
|
+
node = self
|
753
|
+
str = ''
|
754
|
+
|
755
|
+
while node
|
756
|
+
is_last_child = false
|
757
|
+
if node.parent
|
758
|
+
is_last_child = node.parent.children.last == node
|
759
|
+
else
|
760
|
+
# Don't add lines for the top-level.
|
761
|
+
break
|
762
|
+
end
|
763
|
+
|
764
|
+
str = (is_last_child ? ' ' : ' |') + str
|
765
|
+
node = node.parent
|
766
|
+
end
|
767
|
+
|
768
|
+
str
|
769
|
+
end
|
770
|
+
|
771
|
+
# Branch node decoration for the inspection method.
|
772
|
+
def tree_branch_mark
|
773
|
+
return '' unless @parent
|
774
|
+
'-'
|
775
|
+
end
|
776
|
+
|
777
|
+
# Text for the node line for the inspection method.
|
778
|
+
def tree_summary
|
779
|
+
s = " @#{@_id}"
|
780
|
+
if @parent
|
781
|
+
begin
|
782
|
+
s += " +#{@parent.offsets[index_in_parent_node]} ^#{@parent._id}"
|
783
|
+
rescue
|
784
|
+
s += ' ^@'
|
785
|
+
end
|
786
|
+
end
|
787
|
+
if @prev_sibling
|
788
|
+
begin
|
789
|
+
s += " <#{@prev_sibling._id}"
|
790
|
+
rescue
|
791
|
+
s += ' <@'
|
792
|
+
end
|
793
|
+
end
|
794
|
+
if @next_sibling
|
795
|
+
begin
|
796
|
+
s += " >#{@next_sibling._id}"
|
797
|
+
rescue
|
798
|
+
s += ' >@'
|
799
|
+
end
|
800
|
+
end
|
801
|
+
|
802
|
+
s
|
803
|
+
end
|
804
|
+
|
805
|
+
# Print and log an error message for the node.
|
806
|
+
def error(msg)
|
807
|
+
msg = "Error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
|
808
|
+
$stderr.puts msg
|
809
|
+
PEROBS.log.error msg
|
810
|
+
end
|
811
|
+
|
812
|
+
# Print and log an error message for the node.
|
813
|
+
def fatal(msg)
|
814
|
+
msg = "Fatal error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
|
815
|
+
$stderr.puts msg
|
816
|
+
PEROBS.log.fatal msg
|
817
|
+
end
|
818
|
+
|
819
|
+
private
|
820
|
+
|
821
|
+
def min_size
|
822
|
+
@tree.node_size / 2
|
823
|
+
end
|
824
|
+
|
825
|
+
# Move first element of successor to end of child node
|
826
|
+
# @param child_index [Integer] index of the child
|
827
|
+
def move_first_element_of_successor_to_child(child_index)
|
828
|
+
child = @children[child_index]
|
829
|
+
succ = @children[child_index + 1]
|
830
|
+
|
831
|
+
if child.is_leaf?
|
832
|
+
# Adjust offset for the successor node
|
833
|
+
@offsets[child_index + 1] += 1
|
834
|
+
# Move the value
|
835
|
+
child.values << succ.values.shift
|
836
|
+
else
|
837
|
+
# Before:
|
838
|
+
#
|
839
|
+
# Root Node +--------------------------------+
|
840
|
+
# Offsets | 0 7 |
|
841
|
+
# Children | |
|
842
|
+
# child v succ v
|
843
|
+
# Level 1 +---------------++-------------------------------------+
|
844
|
+
# Offsets | 0 4 || 0 4 6 9 |
|
845
|
+
# Children | | | | | |
|
846
|
+
# v v v v v v
|
847
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
848
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
849
|
+
#
|
850
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
851
|
+
#
|
852
|
+
# After:
|
853
|
+
#
|
854
|
+
# Root Node +--------------------------------+
|
855
|
+
# Offsets | 0 11 |
|
856
|
+
# Children | |
|
857
|
+
# child v succ v
|
858
|
+
# Level 1 +--------------------------++--------------------------+
|
859
|
+
# Offsets | 0 4 7 || 0 2 5 |
|
860
|
+
# Children | | | | | |
|
861
|
+
# v v v v v v
|
862
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
863
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
864
|
+
#
|
865
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
866
|
+
#
|
867
|
+
# Adjust the offsets of the successor. The 2nd original offset
|
868
|
+
# determines the delta for the parent node.
|
869
|
+
succ.offsets.shift
|
870
|
+
delta = succ.offsets.first
|
871
|
+
succ.offsets.map! { |o| o -= delta }
|
872
|
+
# The additional child offset can be taken from the parent node
|
873
|
+
# reference.
|
874
|
+
child.offsets << @offsets[child_index + 1]
|
875
|
+
# The parent node offset of the successor needs to be corrected by the
|
876
|
+
# delta value.
|
877
|
+
@offsets[child_index + 1] += delta
|
878
|
+
# Move the child reference
|
879
|
+
child.children << succ.children.shift
|
880
|
+
child.children.last.parent = child
|
881
|
+
end
|
882
|
+
end
|
883
|
+
|
884
|
+
# Move last element of predecessor node to child
|
885
|
+
# @param child_index [Integer] index of the child
|
886
|
+
def move_last_element_of_predecessor_to_child(child_index)
|
887
|
+
pred = @children[child_index - 1]
|
888
|
+
child = @children[child_index]
|
889
|
+
|
890
|
+
if child.is_leaf?
|
891
|
+
# Adjust offset for the predecessor node
|
892
|
+
@offsets[child_index] -= 1
|
893
|
+
# Move the value
|
894
|
+
child.values.unshift(pred.values.pop)
|
895
|
+
else
|
896
|
+
# Before:
|
897
|
+
#
|
898
|
+
# Root Node +--------------------------------+
|
899
|
+
# Offsets | 0 13 |
|
900
|
+
# Children | |
|
901
|
+
# pred v child v
|
902
|
+
# Level 1 +---------------------------------++-------------------+
|
903
|
+
# Offsets | 0 4 7 11 || 0 3 |
|
904
|
+
# Children | | | | | |
|
905
|
+
# v v v v v v
|
906
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
907
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
908
|
+
#
|
909
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
910
|
+
#
|
911
|
+
# After:
|
912
|
+
#
|
913
|
+
# Root Node +--------------------------------+
|
914
|
+
# Offsets | 0 11 |
|
915
|
+
# Children | |
|
916
|
+
# pred v child v
|
917
|
+
# Level 1 +--------------------------++--------------------------+
|
918
|
+
# Offsets | 0 4 7 || 0 2 5 |
|
919
|
+
# Children | | | | | |
|
920
|
+
# v v v v v v
|
921
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
922
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
923
|
+
#
|
924
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
925
|
+
#
|
926
|
+
# Remove the last predecessor offset and update the child offset with
|
927
|
+
# it
|
928
|
+
delta = pred.children.last.values_count
|
929
|
+
@offsets[child_index] -= delta
|
930
|
+
pred.offsets.pop
|
931
|
+
# Adjust all the offsets of the child
|
932
|
+
child.offsets.map! { |o| o += delta }
|
933
|
+
# And prepend the 0 offset
|
934
|
+
child.offsets.unshift(0)
|
935
|
+
# Move the child reference
|
936
|
+
child.children.unshift(pred.children.pop)
|
937
|
+
child.children.first.parent = child
|
938
|
+
end
|
939
|
+
end
|
940
|
+
|
941
|
+
def merge_child_with_next(child_index)
|
942
|
+
c1 = @children[child_index]
|
943
|
+
c2 = @children[child_index + 1]
|
944
|
+
|
945
|
+
if c1.is_leaf?
|
946
|
+
# Update the sibling links
|
947
|
+
c1.next_sibling = c2.next_sibling
|
948
|
+
c1.next_sibling.prev_sibling = c1 if c1.next_sibling
|
949
|
+
|
950
|
+
c1.values += c2.values
|
951
|
+
# Adjust the last_leaf reference in the @tree if c1 is now the last
|
952
|
+
# sibling.
|
953
|
+
@tree.last_leaf = c1 unless c1.next_sibling
|
954
|
+
else
|
955
|
+
# Before:
|
956
|
+
#
|
957
|
+
# Root Node +---------------------+
|
958
|
+
# Offsets | 0 11 |
|
959
|
+
# Children | |
|
960
|
+
# c1 v c2 v
|
961
|
+
# Level 1 +--------------------------++-----+
|
962
|
+
# Offsets | 0 4 7 || 0 |
|
963
|
+
# Children | | | |
|
964
|
+
# v v v v
|
965
|
+
# Leaves +---------++-------++----------++-------+
|
966
|
+
# Values | A B C D || E F G || H I J K || L M |
|
967
|
+
#
|
968
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12
|
969
|
+
#
|
970
|
+
# After:
|
971
|
+
#
|
972
|
+
# Root Node +---+
|
973
|
+
# Offsets | 0 |
|
974
|
+
# Children |
|
975
|
+
# c1 v
|
976
|
+
# Level 1 +---------------------------------+
|
977
|
+
# Offsets | 0 4 7 11 |
|
978
|
+
# Children | | | |
|
979
|
+
# v v v v
|
980
|
+
# Leaves +---------++-------++----------++-------+
|
981
|
+
# Values | A B C D || E F G || H I J K || L M |
|
982
|
+
#
|
983
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12
|
984
|
+
delta = @offsets[child_index + 1] - @offsets[child_index]
|
985
|
+
c1.offsets += c2.offsets.map { |o| o += delta }
|
986
|
+
c2.children.each { |c| c.parent = c1 }
|
987
|
+
c1.children += c2.children
|
988
|
+
end
|
989
|
+
|
990
|
+
# Remove the child successor from the node.
|
991
|
+
@offsets.delete_at(child_index + 1)
|
992
|
+
@children.delete_at(child_index + 1)
|
993
|
+
|
994
|
+
if @parent && size < min_size
|
995
|
+
@parent.consolidate_child_nodes(self)
|
996
|
+
end
|
997
|
+
end
|
998
|
+
|
999
|
+
end
|
1000
|
+
|
1001
|
+
end
|
1002
|
+
|