perobs 4.0.0 → 4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,1002 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigArrayNode.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017, 2018, 2019
|
6
|
+
# by Chris Schlaeger <chris@taskjuggler.org>
|
7
|
+
#
|
8
|
+
# MIT License
|
9
|
+
#
|
10
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
11
|
+
# a copy of this software and associated documentation files (the
|
12
|
+
# "Software"), to deal in the Software without restriction, including
|
13
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
14
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
15
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
16
|
+
# the following conditions:
|
17
|
+
#
|
18
|
+
# The above copyright notice and this permission notice shall be
|
19
|
+
# included in all copies or substantial portions of the Software.
|
20
|
+
#
|
21
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
22
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
23
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
24
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
25
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
26
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
27
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
28
|
+
|
29
|
+
require 'perobs/Object'
|
30
|
+
require 'perobs/Array'
|
31
|
+
|
32
|
+
module PEROBS
|
33
|
+
|
34
|
+
# The BigArrayNode class provides the BTree nodes for the BigArray objects.
|
35
|
+
# A node can either be a branch node or a leaf node. Branch nodes don't
|
36
|
+
# store values, only offsets and references to child nodes. Leaf nodes don't
|
37
|
+
# have child nodes but store the actual values. The leaf nodes always
|
38
|
+
# contain at least node_size / 2 number of consecutive values. The index of
|
39
|
+
# the first value in the BigArray is the sum of the offsets stored in the
|
40
|
+
# parent nodes. Branch nodes store the offsets and the corresponding
|
41
|
+
# child node references. The first offset is always 0. Consecutive offsets
|
42
|
+
# are set to the previous offset plus the total number of values stored in
|
43
|
+
# the previous child node. The leaf nodes don't contain wholes. A
|
44
|
+
# concatenation of all leaf node values represents the stored Array.
|
45
|
+
#
|
46
|
+
# Root Node +--------------------------------+
|
47
|
+
# Offsets | 0 11 |
|
48
|
+
# Children | |
|
49
|
+
# v v
|
50
|
+
# Level 1 +--------------------------++--------------------------+
|
51
|
+
# Offsets | 0 4 7 || 0 2 5 |
|
52
|
+
# Children | | | | | |
|
53
|
+
# v v v v v v
|
54
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
55
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
56
|
+
#
|
57
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
58
|
+
#
|
59
|
+
class BigArrayNode < PEROBS::Object
|
60
|
+
|
61
|
+
attr_persist :tree, :parent, :offsets, :values, :children,
|
62
|
+
:prev_sibling, :next_sibling
|
63
|
+
|
64
|
+
# Internal constructor. Use Store.new(BigArrayNode, ...) instead.
|
65
|
+
# @param p [Handle]
|
66
|
+
# @param tree [BigArray] The tree this node should belong to
|
67
|
+
# @param is_leaf [Boolean] True if a leaf node should be created, false
|
68
|
+
# for a branch node.
|
69
|
+
# @param parent [BigArrayNode] Parent node
|
70
|
+
# @param prev_sibling [BigArrayNode] Previous sibling
|
71
|
+
# @param next_sibling [BigArrayNode] Next sibling
|
72
|
+
def initialize(p, tree, is_leaf, parent = nil,
|
73
|
+
prev_sibling = nil, next_sibling = nil)
|
74
|
+
super(p)
|
75
|
+
self.tree = tree
|
76
|
+
self.parent = parent
|
77
|
+
|
78
|
+
if is_leaf
|
79
|
+
# Create a new leaf node. It stores values and has no children.
|
80
|
+
self.values = @store.new(PEROBS::Array)
|
81
|
+
self.children = self.offsets = nil
|
82
|
+
|
83
|
+
# Link the neighboring siblings to the newly inserted node. If the
|
84
|
+
# node has no sibling on a side we also must register it as first or
|
85
|
+
# last leaf with the BigArray object.
|
86
|
+
if (self.prev_sibling = prev_sibling)
|
87
|
+
@prev_sibling.next_sibling = myself
|
88
|
+
else
|
89
|
+
@tree.first_leaf = myself
|
90
|
+
end
|
91
|
+
if (self.next_sibling = next_sibling)
|
92
|
+
@next_sibling.prev_sibling = myself
|
93
|
+
else
|
94
|
+
@tree.last_leaf = myself
|
95
|
+
end
|
96
|
+
else
|
97
|
+
# Create a new branch node. It stores keys and child node references
|
98
|
+
# but no values.
|
99
|
+
self.offsets = @store.new(PEROBS::Array)
|
100
|
+
self.children = @store.new(PEROBS::Array)
|
101
|
+
self.values = nil
|
102
|
+
# Branch nodes don't need sibling links.
|
103
|
+
self.prev_sibling = self.next_sibling = nil
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return [Boolean] True if this is a leaf node, false otherwise.
|
108
|
+
def is_leaf?
|
109
|
+
@children.nil?
|
110
|
+
end
|
111
|
+
|
112
|
+
def size
|
113
|
+
is_leaf? ? @values.size : @children.size
|
114
|
+
end
|
115
|
+
|
116
|
+
# @return [Integer] the number of values stored in this node.
|
117
|
+
def values_count
|
118
|
+
count = 0
|
119
|
+
node = self
|
120
|
+
while node
|
121
|
+
if node.is_leaf?
|
122
|
+
return count + node.values.size
|
123
|
+
else
|
124
|
+
count += node.offsets.last
|
125
|
+
node = node.children.last
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
# Set the given value at the given index.
|
132
|
+
# @param index [Integer] Position to insert at
|
133
|
+
# @param value [Integer] value to insert
|
134
|
+
def set(index, value)
|
135
|
+
node = self
|
136
|
+
|
137
|
+
# Traverse the tree to find the right node to add or replace the value.
|
138
|
+
while node do
|
139
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
140
|
+
if node.is_leaf?
|
141
|
+
if index >= node.values.size
|
142
|
+
node.fatal "Set index (#{index}) larger than values array " +
|
143
|
+
"(#{node.values.size})."
|
144
|
+
end
|
145
|
+
node.values[index] = value
|
146
|
+
return
|
147
|
+
else
|
148
|
+
# Descend into the right child node to add the value to.
|
149
|
+
cidx = node.search_child_index(index)
|
150
|
+
if (index -= node.offsets[cidx]) < 0
|
151
|
+
node.fatal "Index (#{index}) became negative"
|
152
|
+
end
|
153
|
+
node = node.children[cidx]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
node.fatal "Could not find proper node to set the value while " +
|
158
|
+
"looking for index #{index}"
|
159
|
+
end
|
160
|
+
|
161
|
+
# Insert the given value at the given index. All following values will be
|
162
|
+
# pushed to a higher index.
|
163
|
+
# @param index [Integer] Position to insert at
|
164
|
+
# @param value [Integer] value to insert
|
165
|
+
def insert(index, value)
|
166
|
+
node = self
|
167
|
+
cidx = nil
|
168
|
+
|
169
|
+
# Traverse the tree to find the right node to add or replace the value.
|
170
|
+
while node do
|
171
|
+
# All nodes that we find on the way that are full will be split into
|
172
|
+
# two half-full nodes.
|
173
|
+
if node.size >= @tree.node_size
|
174
|
+
# Re-add the index from the last parent node since we will descent
|
175
|
+
# into one of the split nodes.
|
176
|
+
index += node.parent.offsets[cidx] if node.parent
|
177
|
+
node = node.split_node
|
178
|
+
end
|
179
|
+
|
180
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
181
|
+
if node.is_leaf?
|
182
|
+
node.values.insert(index, value)
|
183
|
+
node.parent.adjust_offsets(node, 1) if node.parent
|
184
|
+
return
|
185
|
+
else
|
186
|
+
# Descend into the right child node to add the value to.
|
187
|
+
cidx = node.search_child_index(index)
|
188
|
+
if (index -= node.offsets[cidx]) < 0
|
189
|
+
node.fatal "Index (#{index}) became negative"
|
190
|
+
end
|
191
|
+
node = node.children[cidx]
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
node.fatal "Could not find proper node to insert the value while " +
|
196
|
+
"looking for index #{index}"
|
197
|
+
end
|
198
|
+
|
199
|
+
# Return the value that matches the given key or return nil if they key is
|
200
|
+
# unknown.
|
201
|
+
# @param index [Integer] Position to insert at
|
202
|
+
# @return [Integer or nil] value that matches the key
|
203
|
+
def get(index)
|
204
|
+
node = self
|
205
|
+
|
206
|
+
# Traverse the tree to find the right node to add or replace the value.
|
207
|
+
while node do
|
208
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
209
|
+
if node.is_leaf?
|
210
|
+
return node.values[index]
|
211
|
+
else
|
212
|
+
# Descend into the right child node to add the value to.
|
213
|
+
cidx = (node.offsets.bsearch_index { |o| o > index } ||
|
214
|
+
node.offsets.length) - 1
|
215
|
+
if (index -= node.offsets[cidx]) < 0
|
216
|
+
node.fatal "Index (#{index}) became negative"
|
217
|
+
end
|
218
|
+
node = node.children[cidx]
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
223
|
+
"looking for index #{index}"
|
224
|
+
end
|
225
|
+
|
226
|
+
# Delete the element at the specified index, returning that element, or
|
227
|
+
# nil if the index is out of range.
|
228
|
+
# @param index [Integer] Index in the BigArray
|
229
|
+
# @return [Object] found value or nil
|
230
|
+
def delete_at(index)
|
231
|
+
node = self
|
232
|
+
deleted_value = nil
|
233
|
+
|
234
|
+
while node do
|
235
|
+
if node.is_leaf?
|
236
|
+
deleted_value = node.values.delete_at(index)
|
237
|
+
if node.parent
|
238
|
+
node.parent.adjust_offsets(node, -1)
|
239
|
+
if node.size < min_size
|
240
|
+
node.parent.consolidate_child_nodes(node)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
return deleted_value
|
245
|
+
else
|
246
|
+
# Descend into the right child node to add the value to.
|
247
|
+
cidx = (node.offsets.bsearch_index { |o| o > index } ||
|
248
|
+
node.offsets.length) - 1
|
249
|
+
if (index -= node.offsets[cidx]) < 0
|
250
|
+
node.fatal "Index (#{index}) became negative"
|
251
|
+
end
|
252
|
+
node = node.children[cidx]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
PEROBS.log.fatal "Could not find proper node to delete from while " +
|
257
|
+
"looking for index #{index}"
|
258
|
+
end
|
259
|
+
|
260
|
+
# Iterate over all the values of the node.
|
261
|
+
# @yield [value]
|
262
|
+
def each
|
263
|
+
return nil unless is_leaf?
|
264
|
+
|
265
|
+
@values.each do |v|
|
266
|
+
yield(v)
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# Iterate over all the values of the node in reverse order.
|
271
|
+
# @yield [value]
|
272
|
+
def reverse_each
|
273
|
+
return nil unless is_leaf?
|
274
|
+
|
275
|
+
@values.reverse_each do |v|
|
276
|
+
yield(v)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
# Check consistency of the node and all subsequent nodes. In case an error
|
281
|
+
# is found, a message is logged and false is returned.
|
282
|
+
# @yield [key, value]
|
283
|
+
# @return [Boolean] true if tree has no errors
|
284
|
+
def check
|
285
|
+
branch_depth = nil
|
286
|
+
|
287
|
+
traverse do |node, position, stack|
|
288
|
+
if position == 0
|
289
|
+
# Nodes should have between min_size() and
|
290
|
+
# @tree.node_size children or values. Only the root node may have
|
291
|
+
# less.
|
292
|
+
if node.size > @tree.node_size
|
293
|
+
node.error "BigArray node #{node._id} is too large. It has " +
|
294
|
+
"#{node.size} nodes instead of max. #{@tree.node_size}."
|
295
|
+
return false
|
296
|
+
end
|
297
|
+
if node.parent && node.size < min_size
|
298
|
+
node.error "BigArray node #{node._id} is too small"
|
299
|
+
return false
|
300
|
+
end
|
301
|
+
|
302
|
+
if node.is_leaf?
|
303
|
+
# All leaf nodes must have same distance from root node.
|
304
|
+
if branch_depth
|
305
|
+
unless branch_depth == stack.size
|
306
|
+
node.error "All leaf nodes must have same distance from root"
|
307
|
+
return false
|
308
|
+
end
|
309
|
+
else
|
310
|
+
branch_depth = stack.size
|
311
|
+
end
|
312
|
+
|
313
|
+
return false unless node.check_leaf_node_links
|
314
|
+
|
315
|
+
if node.children
|
316
|
+
node.error "children must be nil for a leaf node"
|
317
|
+
return false
|
318
|
+
end
|
319
|
+
else
|
320
|
+
unless node.children.size == node.offsets.size
|
321
|
+
node.error "Offset count (#{node.offsets.size}) must be equal " +
|
322
|
+
"to children count (#{node.children.size})"
|
323
|
+
return false
|
324
|
+
end
|
325
|
+
|
326
|
+
if node.values
|
327
|
+
node.error "values must be nil for a branch node"
|
328
|
+
return false
|
329
|
+
end
|
330
|
+
|
331
|
+
unless @prev_sibling.nil? && @next_sibling.nil?
|
332
|
+
node.error "prev_sibling and next_sibling must be nil for " +
|
333
|
+
"branch nodes"
|
334
|
+
end
|
335
|
+
|
336
|
+
return false unless node.check_offsets
|
337
|
+
|
338
|
+
return false unless node.check_child_nodes(stack)
|
339
|
+
end
|
340
|
+
elsif position <= node.size
|
341
|
+
# These checks are done after we have completed the respective child
|
342
|
+
# node with index 'position - 1'.
|
343
|
+
index = position - 1
|
344
|
+
if node.is_leaf?
|
345
|
+
if block_given?
|
346
|
+
# If a block was given, call this block with the key and value.
|
347
|
+
return false unless yield(node.first_index + index,
|
348
|
+
node.values[index])
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
true
|
355
|
+
end
|
356
|
+
|
357
|
+
def check_leaf_node_links
|
358
|
+
if @prev_sibling.nil?
|
359
|
+
if @tree.first_leaf != self
|
360
|
+
error "Leaf node #{@_id} has no previous sibling " +
|
361
|
+
"but is not the first leaf of the tree"
|
362
|
+
return false
|
363
|
+
end
|
364
|
+
elsif @prev_sibling.next_sibling != self
|
365
|
+
error "next_sibling of previous sibling does not point to " +
|
366
|
+
"this node"
|
367
|
+
return false
|
368
|
+
end
|
369
|
+
|
370
|
+
if @next_sibling.nil?
|
371
|
+
if @tree.last_leaf != self
|
372
|
+
error "Leaf node #{@_id} has no next sibling " +
|
373
|
+
"but is not the last leaf of the tree"
|
374
|
+
return false
|
375
|
+
end
|
376
|
+
elsif @next_sibling.prev_sibling != self
|
377
|
+
error "previous_sibling of next sibling does not point to " +
|
378
|
+
"this node"
|
379
|
+
return false
|
380
|
+
end
|
381
|
+
|
382
|
+
true
|
383
|
+
end
|
384
|
+
|
385
|
+
def check_offsets
|
386
|
+
return true if @parent.nil? && @offsets.empty?
|
387
|
+
|
388
|
+
if @offsets[0] != 0
|
389
|
+
error "First offset is not 0: #{@offsets.inspect}"
|
390
|
+
return false
|
391
|
+
end
|
392
|
+
|
393
|
+
last_offset = nil
|
394
|
+
@offsets.each_with_index do |offset, i|
|
395
|
+
if i > 0
|
396
|
+
if offset < last_offset
|
397
|
+
error "Offsets are not strictly monotoneously " +
|
398
|
+
"increasing: #{@offsets.inspect}"
|
399
|
+
return false
|
400
|
+
end
|
401
|
+
expected_offset = last_offset + @children[i - 1].values_count
|
402
|
+
if offset != expected_offset
|
403
|
+
error "Offset #{i} must be #{expected_offset} " +
|
404
|
+
"but is #{offset}."
|
405
|
+
return false
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
last_offset = offset
|
410
|
+
end
|
411
|
+
|
412
|
+
true
|
413
|
+
end
|
414
|
+
|
415
|
+
def check_child_nodes(stack)
|
416
|
+
if @children.uniq.size != @children.size
|
417
|
+
error "Node #{@_id} has multiple identical children"
|
418
|
+
return false
|
419
|
+
end
|
420
|
+
|
421
|
+
@children.each_with_index do |child, i|
|
422
|
+
unless child.is_a?(BigArrayNode)
|
423
|
+
error "Child #{@_id} is of class #{child.class} " +
|
424
|
+
"instead of BigArrayNode"
|
425
|
+
return false
|
426
|
+
end
|
427
|
+
|
428
|
+
unless child.parent.is_a?(BigArrayNode)
|
429
|
+
error "Parent reference of child #{i} is of class " +
|
430
|
+
"#{child.class} instead of BigArrayNode"
|
431
|
+
return false
|
432
|
+
end
|
433
|
+
|
434
|
+
if child.parent != self
|
435
|
+
error "Child node #{child._id} has wrong parent " +
|
436
|
+
"#{child.parent._id}. It should be #{@_id}."
|
437
|
+
return false
|
438
|
+
end
|
439
|
+
|
440
|
+
if child == self
|
441
|
+
error "Child #{i} point to self"
|
442
|
+
return false
|
443
|
+
end
|
444
|
+
|
445
|
+
if stack.include?(child)
|
446
|
+
error "Child #{i} points to ancester node"
|
447
|
+
return false
|
448
|
+
end
|
449
|
+
|
450
|
+
unless child.parent == self
|
451
|
+
error "Child #{i} does not have parent pointing " +
|
452
|
+
"to this node"
|
453
|
+
return false
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
true
|
458
|
+
end
|
459
|
+
|
460
|
+
# @return [String] Human reable form of the sub-tree.
|
461
|
+
def to_s
|
462
|
+
str = ''
|
463
|
+
|
464
|
+
traverse do |node, position, stack|
|
465
|
+
if position == 0
|
466
|
+
begin
|
467
|
+
str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
|
468
|
+
"#{node.tree_branch_mark}-" +
|
469
|
+
"#{node.size == 0 ? '--' : 'v-'}#{node.tree_summary}\n"
|
470
|
+
rescue => e
|
471
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
472
|
+
end
|
473
|
+
else
|
474
|
+
begin
|
475
|
+
if node.is_leaf?
|
476
|
+
if position <= node.size
|
477
|
+
str += "#{node.tree_prefix} " +
|
478
|
+
"#{position == node.size ? '-' : '|'} " +
|
479
|
+
"[ #{node.value_index(position - 1)}: " +
|
480
|
+
"#{node.values[position - 1].nil? ?
|
481
|
+
'nil' : node.values[position - 1]} ]\n"
|
482
|
+
end
|
483
|
+
end
|
484
|
+
rescue => e
|
485
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
490
|
+
str
|
491
|
+
end
|
492
|
+
|
493
|
+
# Split the current node into two nodes. The upper half of the elements
|
494
|
+
# will be moved into a newly created node. This node will retain the lower
|
495
|
+
# half.
|
496
|
+
# @return [BigArrayNode] common parent of the two nodes
|
497
|
+
def split_node
|
498
|
+
unless @parent
|
499
|
+
# The node is the root node. We need to create a parent node first.
|
500
|
+
self.parent = @store.new(BigArrayNode, @tree, false)
|
501
|
+
@parent.offsets[0] = 0
|
502
|
+
@parent.children[0] = myself
|
503
|
+
@tree.root = @parent
|
504
|
+
end
|
505
|
+
|
506
|
+
# Create the new sibling that will take the 2nd half of the
|
507
|
+
# node content.
|
508
|
+
sibling = @store.new(BigArrayNode, @tree, is_leaf?, @parent, myself,
|
509
|
+
@next_sibling)
|
510
|
+
# Determine the index of the middle element that gets moved to the
|
511
|
+
# parent. The node size must be an uneven number.
|
512
|
+
mid = size / 2
|
513
|
+
if is_leaf?
|
514
|
+
# Before:
|
515
|
+
# +--------------------------+
|
516
|
+
# | 0 4 7 |
|
517
|
+
# | | |
|
518
|
+
# v v v
|
519
|
+
# +---------++-------++----------+
|
520
|
+
# | A B C D || E F G || H I J K |
|
521
|
+
#
|
522
|
+
# After:
|
523
|
+
# +--------------------------+
|
524
|
+
# | 0 2 4 7 |
|
525
|
+
# | | | |
|
526
|
+
# v v v v
|
527
|
+
# +-----++----++-------++----------+
|
528
|
+
# | A B || C D || E F G || H I J K |
|
529
|
+
#
|
530
|
+
#
|
531
|
+
# Insert the middle element key into the parent node
|
532
|
+
@parent.insert_child_after_peer(mid, sibling, self)
|
533
|
+
# Copy the values from the mid element onwards into the new
|
534
|
+
# sibling node.
|
535
|
+
sibling.values += @values[mid..-1]
|
536
|
+
# Delete the copied offsets and values from this node.
|
537
|
+
@values.slice!(mid..-1)
|
538
|
+
else
|
539
|
+
# Before:
|
540
|
+
# +--------------+
|
541
|
+
# | 0 11 |
|
542
|
+
# | |
|
543
|
+
# v v
|
544
|
+
# +----------++-------+
|
545
|
+
# | 0 4 7 10 || 0 2 5 |
|
546
|
+
# | | | | | | |
|
547
|
+
# v v v v v v v
|
548
|
+
#
|
549
|
+
# After:
|
550
|
+
# +------------------+
|
551
|
+
# | 0 7 11 |
|
552
|
+
# | | |
|
553
|
+
# v v v
|
554
|
+
# +-----++-----++-------+
|
555
|
+
# | 0 4 0 3 || 0 2 5 |
|
556
|
+
# | | | | | | |
|
557
|
+
# v v v v v v v
|
558
|
+
#
|
559
|
+
# Insert the new sibling into the parent node.
|
560
|
+
offset_delta = @offsets[mid]
|
561
|
+
@parent.insert_child_after_peer(offset_delta, sibling, self)
|
562
|
+
# Copy the offsets from after the mid value onwards to the new sibling
|
563
|
+
# node. We substract the offset delta from each of them.
|
564
|
+
sibling.offsets += @offsets[mid..-1].map{ |v| v - offset_delta }
|
565
|
+
# Delete the copied offsets from this node.
|
566
|
+
@offsets.slice!(mid..-1)
|
567
|
+
# Same copy for the children.
|
568
|
+
sibling.children += @children[mid..-1]
|
569
|
+
# Reparent the children to the new sibling parent.
|
570
|
+
sibling.children.each { |c| c.parent = sibling }
|
571
|
+
# And delete the copied children references.
|
572
|
+
@children.slice!(mid..-1)
|
573
|
+
end
|
574
|
+
|
575
|
+
@parent
|
576
|
+
end
|
577
|
+
|
578
|
+
def insert_child_after_peer(offset, node, peer = nil)
|
579
|
+
peer_index = @children.find_index(peer)
|
580
|
+
cidx = peer_index ? peer_index + 1 : 0
|
581
|
+
@offsets.insert(cidx, @offsets[peer_index] + offset)
|
582
|
+
@children.insert(cidx, node)
|
583
|
+
end
|
584
|
+
|
585
|
+
def consolidate_child_nodes(child)
|
586
|
+
unless (child_index = @children.index(child))
|
587
|
+
error "Cannot find child to consolidate"
|
588
|
+
end
|
589
|
+
|
590
|
+
if child_index == 0
|
591
|
+
# Consolidate with successor if it exists.
|
592
|
+
return unless (succ = @children[child_index + 1])
|
593
|
+
|
594
|
+
if child.size + succ.size <= @tree.node_size
|
595
|
+
# merge child with successor
|
596
|
+
merge_child_with_next(child_index)
|
597
|
+
else
|
598
|
+
move_first_element_of_successor_to_child(child_index)
|
599
|
+
end
|
600
|
+
else
|
601
|
+
# consolidate with predecessor
|
602
|
+
pred = @children[child_index - 1]
|
603
|
+
|
604
|
+
if pred.size + child.size <= @tree.node_size
|
605
|
+
# merge child with predecessor
|
606
|
+
merge_child_with_next(child_index - 1)
|
607
|
+
else
|
608
|
+
move_last_element_of_predecessor_to_child(child_index)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
# @param offset [Integer] offset to search the child index for
|
614
|
+
# @return [Integer] Index of the matching offset or the insert position.
|
615
|
+
def search_child_index(offset)
|
616
|
+
# Handle special case for empty offsets list.
|
617
|
+
return 0 if @offsets.empty? || offset <= @offsets.first
|
618
|
+
|
619
|
+
(@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
|
620
|
+
end
|
621
|
+
|
622
|
+
# @return The index of the current node in the children list of the parent
|
623
|
+
# node. If the node is the root node, nil is returned.
|
624
|
+
def index_in_parent_node
|
625
|
+
return nil unless @parent
|
626
|
+
|
627
|
+
@parent.children.find_index(self)
|
628
|
+
end
|
629
|
+
|
630
|
+
def first_index
|
631
|
+
# TODO: This is a very expensive method. Find a way to make this way
|
632
|
+
# faster.
|
633
|
+
node = parent
|
634
|
+
child = myself
|
635
|
+
while node
|
636
|
+
if (index = node.children.index(child)) && index > 0
|
637
|
+
return node.offsets[index - 1]
|
638
|
+
end
|
639
|
+
child = node
|
640
|
+
node = node.parent
|
641
|
+
end
|
642
|
+
|
643
|
+
0
|
644
|
+
end
|
645
|
+
|
646
|
+
# Compute the array index of the value with the given index in the current
|
647
|
+
# node.
|
648
|
+
# @param idx [Integer] Index of the value in the current node
|
649
|
+
# @return [Integer] Array index of the value
|
650
|
+
def value_index(idx)
|
651
|
+
node = self
|
652
|
+
while node.parent
|
653
|
+
idx += node.parent.offsets[node.index_in_parent_node]
|
654
|
+
node = node.parent
|
655
|
+
end
|
656
|
+
|
657
|
+
idx
|
658
|
+
end
|
659
|
+
|
660
|
+
# This method takes care of adjusting the offsets in tree in case elements
|
661
|
+
# were inserted or removed. All nodes that hold children after the
|
662
|
+
# insert/remove operation need to be adjusted. Since child nodes get their
|
663
|
+
# offsets via their parents, only the parent node and the direct ancestor
|
664
|
+
# followers need to be adjusted.
|
665
|
+
# @param after_child [BigArrayNode] specifies the modified leaf node
|
666
|
+
# @param delta [Integer] specifies how many elements were inserted or
|
667
|
+
# removed.
|
668
|
+
def adjust_offsets(after_child, delta)
|
669
|
+
node = self
|
670
|
+
|
671
|
+
while node
|
672
|
+
adjust = false
|
673
|
+
0.upto(node.children.size - 1) do |i|
|
674
|
+
# Iterate over the children until we have found the after_child
|
675
|
+
# node. Then turn on adjustment mode. The offsets of the following
|
676
|
+
# entries will be adjusted by delta.
|
677
|
+
if adjust
|
678
|
+
node.offsets[i] += delta
|
679
|
+
elsif node.children[i] == after_child
|
680
|
+
adjust = true
|
681
|
+
end
|
682
|
+
end
|
683
|
+
|
684
|
+
unless adjust
|
685
|
+
node.fatal "Could not find child #{after_child._id}"
|
686
|
+
end
|
687
|
+
|
688
|
+
after_child = node
|
689
|
+
node = node.parent
|
690
|
+
end
|
691
|
+
end
|
692
|
+
|
693
|
+
# This is a generic tree iterator. It yields before it descends into the
|
694
|
+
# child node and after (which is identical to before the next child
|
695
|
+
# descend). It yields the node, the position and the stack of parent
|
696
|
+
# nodes.
|
697
|
+
# @yield [node, position, stack]
|
698
|
+
def traverse
|
699
|
+
# We use a non-recursive implementation to traverse the tree. This stack
|
700
|
+
# keeps track of all the known still to be checked nodes.
|
701
|
+
stack = [ [ self, 0 ] ]
|
702
|
+
|
703
|
+
while !stack.empty?
|
704
|
+
node, position = stack.pop
|
705
|
+
|
706
|
+
# Call the payload method. The position marks where we are in the node
|
707
|
+
# with respect to the traversal. 0 means we've just entered the node
|
708
|
+
# for the first time and are about to descent to the first child.
|
709
|
+
# Position 1 is after the 1st child has been processed and before the
|
710
|
+
# 2nd child is being processed. If we have N children, the last
|
711
|
+
# position is N after we have processed the last child and are about
|
712
|
+
# to return to the parent node.
|
713
|
+
yield(node, position, stack)
|
714
|
+
|
715
|
+
if position <= node.size
|
716
|
+
# Push the next position for this node onto the stack.
|
717
|
+
stack.push([ node, position + 1 ])
|
718
|
+
|
719
|
+
if !node.is_leaf? && node.children[position]
|
720
|
+
# If we have a child node for this position, push the linked node
|
721
|
+
# and the starting position onto the stack.
|
722
|
+
stack.push([ node.children[position], 0 ])
|
723
|
+
end
|
724
|
+
end
|
725
|
+
end
|
726
|
+
end
|
727
|
+
|
728
|
+
# Gather some statistics about the node and all sub nodes.
|
729
|
+
# @param stats [Stats] Data structure that stores the gathered data
|
730
|
+
def statistics(stats)
|
731
|
+
traverse do |node, position, stack|
|
732
|
+
if position == 0
|
733
|
+
if node.is_leaf?
|
734
|
+
stats.leaf_nodes += 1
|
735
|
+
depth = stack.size + 1
|
736
|
+
if stats.min_depth.nil? || stats.min_depth < depth
|
737
|
+
stats.min_depth = depth
|
738
|
+
end
|
739
|
+
if stats.max_depth.nil? || stats.max_depth > depth
|
740
|
+
stats.max_depth = depth
|
741
|
+
end
|
742
|
+
else
|
743
|
+
stats.branch_nodes += 1
|
744
|
+
end
|
745
|
+
end
|
746
|
+
end
|
747
|
+
end
|
748
|
+
|
749
|
+
# Return the decoration that marks the tree structure of this node for the
|
750
|
+
# inspection method.
|
751
|
+
def tree_prefix
|
752
|
+
node = self
|
753
|
+
str = ''
|
754
|
+
|
755
|
+
while node
|
756
|
+
is_last_child = false
|
757
|
+
if node.parent
|
758
|
+
is_last_child = node.parent.children.last == node
|
759
|
+
else
|
760
|
+
# Don't add lines for the top-level.
|
761
|
+
break
|
762
|
+
end
|
763
|
+
|
764
|
+
str = (is_last_child ? ' ' : ' |') + str
|
765
|
+
node = node.parent
|
766
|
+
end
|
767
|
+
|
768
|
+
str
|
769
|
+
end
|
770
|
+
|
771
|
+
# Branch node decoration for the inspection method.
|
772
|
+
def tree_branch_mark
|
773
|
+
return '' unless @parent
|
774
|
+
'-'
|
775
|
+
end
|
776
|
+
|
777
|
+
# Text for the node line for the inspection method.
|
778
|
+
def tree_summary
|
779
|
+
s = " @#{@_id}"
|
780
|
+
if @parent
|
781
|
+
begin
|
782
|
+
s += " +#{@parent.offsets[index_in_parent_node]} ^#{@parent._id}"
|
783
|
+
rescue
|
784
|
+
s += ' ^@'
|
785
|
+
end
|
786
|
+
end
|
787
|
+
if @prev_sibling
|
788
|
+
begin
|
789
|
+
s += " <#{@prev_sibling._id}"
|
790
|
+
rescue
|
791
|
+
s += ' <@'
|
792
|
+
end
|
793
|
+
end
|
794
|
+
if @next_sibling
|
795
|
+
begin
|
796
|
+
s += " >#{@next_sibling._id}"
|
797
|
+
rescue
|
798
|
+
s += ' >@'
|
799
|
+
end
|
800
|
+
end
|
801
|
+
|
802
|
+
s
|
803
|
+
end
|
804
|
+
|
805
|
+
# Print and log an error message for the node.
|
806
|
+
def error(msg)
|
807
|
+
msg = "Error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
|
808
|
+
$stderr.puts msg
|
809
|
+
PEROBS.log.error msg
|
810
|
+
end
|
811
|
+
|
812
|
+
# Print and log an error message for the node.
|
813
|
+
def fatal(msg)
|
814
|
+
msg = "Fatal error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
|
815
|
+
$stderr.puts msg
|
816
|
+
PEROBS.log.fatal msg
|
817
|
+
end
|
818
|
+
|
819
|
+
private
|
820
|
+
|
821
|
+
def min_size
|
822
|
+
@tree.node_size / 2
|
823
|
+
end
|
824
|
+
|
825
|
+
# Move first element of successor to end of child node
|
826
|
+
# @param child_index [Integer] index of the child
|
827
|
+
def move_first_element_of_successor_to_child(child_index)
|
828
|
+
child = @children[child_index]
|
829
|
+
succ = @children[child_index + 1]
|
830
|
+
|
831
|
+
if child.is_leaf?
|
832
|
+
# Adjust offset for the successor node
|
833
|
+
@offsets[child_index + 1] += 1
|
834
|
+
# Move the value
|
835
|
+
child.values << succ.values.shift
|
836
|
+
else
|
837
|
+
# Before:
|
838
|
+
#
|
839
|
+
# Root Node +--------------------------------+
|
840
|
+
# Offsets | 0 7 |
|
841
|
+
# Children | |
|
842
|
+
# child v succ v
|
843
|
+
# Level 1 +---------------++-------------------------------------+
|
844
|
+
# Offsets | 0 4 || 0 4 6 9 |
|
845
|
+
# Children | | | | | |
|
846
|
+
# v v v v v v
|
847
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
848
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
849
|
+
#
|
850
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
851
|
+
#
|
852
|
+
# After:
|
853
|
+
#
|
854
|
+
# Root Node +--------------------------------+
|
855
|
+
# Offsets | 0 11 |
|
856
|
+
# Children | |
|
857
|
+
# child v succ v
|
858
|
+
# Level 1 +--------------------------++--------------------------+
|
859
|
+
# Offsets | 0 4 7 || 0 2 5 |
|
860
|
+
# Children | | | | | |
|
861
|
+
# v v v v v v
|
862
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
863
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
864
|
+
#
|
865
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
866
|
+
#
|
867
|
+
# Adjust the offsets of the successor. The 2nd original offset
|
868
|
+
# determines the delta for the parent node.
|
869
|
+
succ.offsets.shift
|
870
|
+
delta = succ.offsets.first
|
871
|
+
succ.offsets.map! { |o| o -= delta }
|
872
|
+
# The additional child offset can be taken from the parent node
|
873
|
+
# reference.
|
874
|
+
child.offsets << @offsets[child_index + 1]
|
875
|
+
# The parent node offset of the successor needs to be corrected by the
|
876
|
+
# delta value.
|
877
|
+
@offsets[child_index + 1] += delta
|
878
|
+
# Move the child reference
|
879
|
+
child.children << succ.children.shift
|
880
|
+
child.children.last.parent = child
|
881
|
+
end
|
882
|
+
end
|
883
|
+
|
884
|
+
# Move last element of predecessor node to child
|
885
|
+
# @param child_index [Integer] index of the child
|
886
|
+
def move_last_element_of_predecessor_to_child(child_index)
|
887
|
+
pred = @children[child_index - 1]
|
888
|
+
child = @children[child_index]
|
889
|
+
|
890
|
+
if child.is_leaf?
|
891
|
+
# Adjust offset for the predecessor node
|
892
|
+
@offsets[child_index] -= 1
|
893
|
+
# Move the value
|
894
|
+
child.values.unshift(pred.values.pop)
|
895
|
+
else
|
896
|
+
# Before:
|
897
|
+
#
|
898
|
+
# Root Node +--------------------------------+
|
899
|
+
# Offsets | 0 13 |
|
900
|
+
# Children | |
|
901
|
+
# pred v child v
|
902
|
+
# Level 1 +---------------------------------++-------------------+
|
903
|
+
# Offsets | 0 4 7 11 || 0 3 |
|
904
|
+
# Children | | | | | |
|
905
|
+
# v v v v v v
|
906
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
907
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
908
|
+
#
|
909
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
910
|
+
#
|
911
|
+
# After:
|
912
|
+
#
|
913
|
+
# Root Node +--------------------------------+
|
914
|
+
# Offsets | 0 11 |
|
915
|
+
# Children | |
|
916
|
+
# pred v child v
|
917
|
+
# Level 1 +--------------------------++--------------------------+
|
918
|
+
# Offsets | 0 4 7 || 0 2 5 |
|
919
|
+
# Children | | | | | |
|
920
|
+
# v v v v v v
|
921
|
+
# Leaves +---------++-------++----------++-------++----------++-------+
|
922
|
+
# Values | A B C D || E F G || H I J K || L M || N O P || Q R |
|
923
|
+
#
|
924
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
925
|
+
#
|
926
|
+
# Remove the last predecessor offset and update the child offset with
|
927
|
+
# it
|
928
|
+
delta = pred.children.last.values_count
|
929
|
+
@offsets[child_index] -= delta
|
930
|
+
pred.offsets.pop
|
931
|
+
# Adjust all the offsets of the child
|
932
|
+
child.offsets.map! { |o| o += delta }
|
933
|
+
# And prepend the 0 offset
|
934
|
+
child.offsets.unshift(0)
|
935
|
+
# Move the child reference
|
936
|
+
child.children.unshift(pred.children.pop)
|
937
|
+
child.children.first.parent = child
|
938
|
+
end
|
939
|
+
end
|
940
|
+
|
941
|
+
def merge_child_with_next(child_index)
|
942
|
+
c1 = @children[child_index]
|
943
|
+
c2 = @children[child_index + 1]
|
944
|
+
|
945
|
+
if c1.is_leaf?
|
946
|
+
# Update the sibling links
|
947
|
+
c1.next_sibling = c2.next_sibling
|
948
|
+
c1.next_sibling.prev_sibling = c1 if c1.next_sibling
|
949
|
+
|
950
|
+
c1.values += c2.values
|
951
|
+
# Adjust the last_leaf reference in the @tree if c1 is now the last
|
952
|
+
# sibling.
|
953
|
+
@tree.last_leaf = c1 unless c1.next_sibling
|
954
|
+
else
|
955
|
+
# Before:
|
956
|
+
#
|
957
|
+
# Root Node +---------------------+
|
958
|
+
# Offsets | 0 11 |
|
959
|
+
# Children | |
|
960
|
+
# c1 v c2 v
|
961
|
+
# Level 1 +--------------------------++-----+
|
962
|
+
# Offsets | 0 4 7 || 0 |
|
963
|
+
# Children | | | |
|
964
|
+
# v v v v
|
965
|
+
# Leaves +---------++-------++----------++-------+
|
966
|
+
# Values | A B C D || E F G || H I J K || L M |
|
967
|
+
#
|
968
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12
|
969
|
+
#
|
970
|
+
# After:
|
971
|
+
#
|
972
|
+
# Root Node +---+
|
973
|
+
# Offsets | 0 |
|
974
|
+
# Children |
|
975
|
+
# c1 v
|
976
|
+
# Level 1 +---------------------------------+
|
977
|
+
# Offsets | 0 4 7 11 |
|
978
|
+
# Children | | | |
|
979
|
+
# v v v v
|
980
|
+
# Leaves +---------++-------++----------++-------+
|
981
|
+
# Values | A B C D || E F G || H I J K || L M |
|
982
|
+
#
|
983
|
+
# Index 0 1 2 3 4 5 6 7 8 9 10 11 12
|
984
|
+
delta = @offsets[child_index + 1] - @offsets[child_index]
|
985
|
+
c1.offsets += c2.offsets.map { |o| o += delta }
|
986
|
+
c2.children.each { |c| c.parent = c1 }
|
987
|
+
c1.children += c2.children
|
988
|
+
end
|
989
|
+
|
990
|
+
# Remove the child successor from the node.
|
991
|
+
@offsets.delete_at(child_index + 1)
|
992
|
+
@children.delete_at(child_index + 1)
|
993
|
+
|
994
|
+
if @parent && size < min_size
|
995
|
+
@parent.consolidate_child_nodes(self)
|
996
|
+
end
|
997
|
+
end
|
998
|
+
|
999
|
+
end
|
1000
|
+
|
1001
|
+
end
|
1002
|
+
|