perobs 4.0.0 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/perobs.rb +1 -0
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +83 -12
- data/lib/perobs/BTreeBlob.rb +1 -1
- data/lib/perobs/BTreeDB.rb +2 -2
- data/lib/perobs/BTreeNode.rb +365 -85
- data/lib/perobs/BigArray.rb +267 -0
- data/lib/perobs/BigArrayNode.rb +998 -0
- data/lib/perobs/BigHash.rb +262 -0
- data/lib/perobs/BigTree.rb +184 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +143 -51
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +363 -203
- data/lib/perobs/FlatFileBlobHeader.rb +98 -54
- data/lib/perobs/FlatFileDB.rb +42 -20
- data/lib/perobs/Hash.rb +58 -13
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +38 -4
- data/lib/perobs/PersistentObjectCache.rb +53 -67
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +71 -32
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +5 -2
- data/test/BigArray_spec.rb +214 -0
- data/test/BigHash_spec.rb +144 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +63 -14
- data/test/Hash_spec.rb +1 -2
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +151 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +290 -199
- data/test/spec_helper.rb +9 -4
- metadata +47 -10
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,262 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigHash.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/BigTree'
|
30
|
+
require 'perobs/Array'
|
31
|
+
require 'perobs/FNV_Hash_1a_64'
|
32
|
+
|
33
|
+
module PEROBS
|
34
|
+
|
35
|
+
# The BigHash is similar to the Hash object in that it provides a simple
|
36
|
+
# hash functionality. The difference is that this class scales to much
|
37
|
+
# larger data sets essentially limited to the amount of space available on
|
38
|
+
# your backing store. The data is persisted immediately and uses
|
39
|
+
# transactions to ensure the data consistent. It only provides a small
|
40
|
+
# subset of the methods provided by the native Hash class that make sense
|
41
|
+
# for giant data sets.
|
42
|
+
class BigHash < PEROBS::Object
|
43
|
+
|
44
|
+
# Internally this class uses BigTree to store the values by the hashed
|
45
|
+
# key. We are using a 64 bit hash space so collisions are fairly unlikely
|
46
|
+
# but not impossible. Therefor we have to store the originial key with the
|
47
|
+
# value to ensure that we got the right value. The key and value are
|
48
|
+
# stored in an Entry object.
|
49
|
+
#
|
50
|
+
# In case we have a collision we need to store multiple values for the
|
51
|
+
# same hashed key. In that case we store the Entry objects for the same
|
52
|
+
# hashed key in a Collisions object instead of storing the Entry
|
53
|
+
# directly in the BigTree.
|
54
|
+
class Entry < PEROBS::Object
|
55
|
+
|
56
|
+
attr_persist :key, :value
|
57
|
+
|
58
|
+
def initialize(p, key, value)
|
59
|
+
super(p)
|
60
|
+
self.key = key
|
61
|
+
self.value = value
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# Since the BigHash can also store PEROBS::Array values we need to
|
67
|
+
# introduce a new class so we can tell apart collisions from Arrays.
|
68
|
+
class Collisions < PEROBS::Array
|
69
|
+
end
|
70
|
+
|
71
|
+
attr_persist :btree, :entry_counter
|
72
|
+
|
73
|
+
# Create a new BigHash object.
|
74
|
+
# @param p [Handle] Store handle
|
75
|
+
def initialize(p)
|
76
|
+
super(p)
|
77
|
+
restore
|
78
|
+
self.btree = @store.new(PEROBS::BigTree)
|
79
|
+
self.entry_counter = 0
|
80
|
+
end
|
81
|
+
|
82
|
+
def restore
|
83
|
+
end
|
84
|
+
|
85
|
+
# Insert a value that is associated with the given key. If a value for
|
86
|
+
# this key already exists, the value will be overwritten with the newly
|
87
|
+
# provided value.
|
88
|
+
# @param key [Integer or String]
|
89
|
+
# @param value [Any PEROBS storable object]
|
90
|
+
def []=(key, value)
|
91
|
+
hashed_key = hash_key(key)
|
92
|
+
@store.transaction do
|
93
|
+
entry = @store.new(Entry, key, value)
|
94
|
+
|
95
|
+
if (existing_entry = @btree.get(hashed_key))
|
96
|
+
# There is already an existing entry for this hashed key.
|
97
|
+
if existing_entry.is_a?(Collisions)
|
98
|
+
# Find the right index to insert the new entry. If there is
|
99
|
+
# already an entry with the same key overwrite that entry.
|
100
|
+
index_to_insert = 0
|
101
|
+
overwrite = false
|
102
|
+
existing_entry.each do |ae|
|
103
|
+
if ae.key == key
|
104
|
+
overwrite = true
|
105
|
+
break
|
106
|
+
end
|
107
|
+
index_to_insert += 1
|
108
|
+
end
|
109
|
+
self.entry_counter += 1 unless overwrite
|
110
|
+
existing_entry[index_to_insert] = entry
|
111
|
+
elsif existing_entry.key == key
|
112
|
+
# The existing value is for the identical key. We can safely
|
113
|
+
# overwrite
|
114
|
+
@btree.insert(hashed_key, entry)
|
115
|
+
else
|
116
|
+
# There is a single existing entry, but for a different key. Create
|
117
|
+
# a new PEROBS::Array and store both entries.
|
118
|
+
array_entry = @store.new(Collisions)
|
119
|
+
array_entry << existing_entry
|
120
|
+
array_entry << entry
|
121
|
+
@btree.insert(hashed_key, array_entry)
|
122
|
+
self.entry_counter += 1
|
123
|
+
end
|
124
|
+
else
|
125
|
+
# No existing entry. Insert the new entry.
|
126
|
+
@btree.insert(hashed_key, entry)
|
127
|
+
self.entry_counter += 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Retrieve the value for the given key. If no value for the key is found
|
133
|
+
# nil is returned.
|
134
|
+
# @param key [Integer or String]
|
135
|
+
# @return [Any PEROBS storable object]
|
136
|
+
def [](key)
|
137
|
+
hashed_key = hash_key(key)
|
138
|
+
unless (entry = @btree.get(hashed_key))
|
139
|
+
return nil
|
140
|
+
end
|
141
|
+
|
142
|
+
if entry.is_a?(PEROBS::Array)
|
143
|
+
entry.each do |ae|
|
144
|
+
return ae.value if ae.key == key
|
145
|
+
end
|
146
|
+
else
|
147
|
+
return entry.value if entry.key == key
|
148
|
+
end
|
149
|
+
|
150
|
+
nil
|
151
|
+
end
|
152
|
+
|
153
|
+
# Check if the is a value stored for the given key.
|
154
|
+
# @param key [Integer or String]
|
155
|
+
# @return [TrueClass or FalseClass]
|
156
|
+
def has_key?(key)
|
157
|
+
hashed_key = hash_key(key)
|
158
|
+
unless (entry = @btree.get(hashed_key))
|
159
|
+
return false
|
160
|
+
end
|
161
|
+
|
162
|
+
if entry.is_a?(PEROBS::Array)
|
163
|
+
entry.each do |ae|
|
164
|
+
return true if ae.key == key
|
165
|
+
end
|
166
|
+
else
|
167
|
+
return true if entry.key == key
|
168
|
+
end
|
169
|
+
|
170
|
+
false
|
171
|
+
end
|
172
|
+
|
173
|
+
# Delete and return the entry for the given key. Return nil if no matching
|
174
|
+
# entry exists.
|
175
|
+
# @param key [Integer or String]
|
176
|
+
# @return [Object] Deleted entry
|
177
|
+
def delete(key)
|
178
|
+
hashed_key = hash_key(key)
|
179
|
+
unless (entry = @btree.get(hashed_key))
|
180
|
+
return nil
|
181
|
+
end
|
182
|
+
|
183
|
+
if entry.is_a?(PEROBS::Array)
|
184
|
+
entry.each_with_index do |ae, i|
|
185
|
+
if ae.key == key
|
186
|
+
self.entry_counter -= 1
|
187
|
+
return entry.delete_at(i).value
|
188
|
+
end
|
189
|
+
end
|
190
|
+
else
|
191
|
+
return entry.value if entry.key == key
|
192
|
+
end
|
193
|
+
|
194
|
+
nil
|
195
|
+
end
|
196
|
+
|
197
|
+
# Return the number of entries stored in the hash.
|
198
|
+
# @return [Integer]
|
199
|
+
def length
|
200
|
+
@entry_counter
|
201
|
+
end
|
202
|
+
|
203
|
+
alias size length
|
204
|
+
|
205
|
+
# Return true if hash is empty. False otherweise.
|
206
|
+
# @return [TrueClass, FalseClass]
|
207
|
+
def empty?
|
208
|
+
@entry_counter == 0
|
209
|
+
end
|
210
|
+
|
211
|
+
# Calls the given block for each key/value pair.
|
212
|
+
# @yield(key, value)
|
213
|
+
def each(&block)
|
214
|
+
@btree.each do |index, entry|
|
215
|
+
if entry.is_a?(Collisions)
|
216
|
+
break unless entry.each do |c_entry|
|
217
|
+
yield(c_entry.key, c_entry.value)
|
218
|
+
end
|
219
|
+
else
|
220
|
+
yield(entry.key, entry.value)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# This is mostly intended for debugging as the result can be very big.
|
226
|
+
# It returns an Array of keys stored in the hash.
|
227
|
+
# @return [Array] A list of all keys
|
228
|
+
def keys
|
229
|
+
ks = []
|
230
|
+
each { |k, v| ks << k }
|
231
|
+
ks
|
232
|
+
end
|
233
|
+
|
234
|
+
# Check if the data structure contains any errors.
|
235
|
+
# @return [Boolean] true if no erros were found, false otherwise
|
236
|
+
def check
|
237
|
+
return false unless @btree.check
|
238
|
+
|
239
|
+
i = 0
|
240
|
+
each do |k, v|
|
241
|
+
i += 1
|
242
|
+
end
|
243
|
+
|
244
|
+
unless @entry_counter == i
|
245
|
+
PEROBS.log.error "BigHash contains #{i} values but entry counter " +
|
246
|
+
"is #{@entry_counter}"
|
247
|
+
return false
|
248
|
+
end
|
249
|
+
|
250
|
+
true
|
251
|
+
end
|
252
|
+
|
253
|
+
private
|
254
|
+
|
255
|
+
def hash_key(key)
|
256
|
+
FNV_Hash_1a_64::digest(key)
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
|
261
|
+
end
|
262
|
+
|
@@ -0,0 +1,184 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigTree.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/BigTreeNode'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The BigTree class implements a BTree as a PEROBS object. It allows to
|
34
|
+
# manage huge amounts of data in a reasonably efficient way. The number of
|
35
|
+
# entries is limited by the space on the backing store, not the main
|
36
|
+
# memory. Entries are addressed by a Integer key.
|
37
|
+
class BigTree < PEROBS::Object
|
38
|
+
|
39
|
+
class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
|
40
|
+
:max_depth)
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
|
44
|
+
|
45
|
+
# Internal constructor. Use Store.new() instead.
|
46
|
+
# @param p [Handle]
|
47
|
+
# @param node_size [Integer] The size of the tree nodes. This determines
|
48
|
+
# how many entries must be read/written for each operation.
|
49
|
+
def initialize(p, node_size = 127)
|
50
|
+
super(p)
|
51
|
+
unless node_size > 2
|
52
|
+
PEROBS.log.fatal "Node size (#{node_size}) must be larger than 2"
|
53
|
+
end
|
54
|
+
attr_init(:node_size, node_size)
|
55
|
+
clear unless instance_variable_defined?('@root')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Remove all entries from the BigTree.
|
59
|
+
def clear
|
60
|
+
self.root = self.first_leaf = self.last_leaf =
|
61
|
+
@store.new(BigTreeNode, myself, true)
|
62
|
+
self.entry_counter = 0
|
63
|
+
end
|
64
|
+
|
65
|
+
# Insert a new value into the tree using the key as a unique index. If the
|
66
|
+
# key already exists the old value will be overwritten.
|
67
|
+
# @param key [Integer] Unique key
|
68
|
+
# @param value [Integer] value
|
69
|
+
def insert(key, value)
|
70
|
+
@store.transaction do
|
71
|
+
@root.insert(key, value)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Retrieve the value associated with the given key. If no entry was found,
|
76
|
+
# return nil.
|
77
|
+
# @param key [Integer] Unique key
|
78
|
+
# @return [Integer or nil] found value or nil
|
79
|
+
def get(key)
|
80
|
+
@root.get(key)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Return the node chain from the root to the leaf node storing the
|
84
|
+
# key/value pair.
|
85
|
+
# @param key [Integer] key to search for
|
86
|
+
# @return [Array of BigTreeNode] node list (may be empty)
|
87
|
+
def node_chain(key)
|
88
|
+
@root.node_chain(key)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Check if there is an entry for the given key.
|
92
|
+
# @param key [Integer] Unique key
|
93
|
+
# @return [Boolean] True if key is present, false otherwise.
|
94
|
+
def has_key?(key)
|
95
|
+
@root.has_key?(key)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Find and remove the value associated with the given key. If no entry was
|
99
|
+
# found, return nil, otherwise the found value.
|
100
|
+
# @param key [Integer] Unique key
|
101
|
+
# @return [Integer or nil] found value or nil
|
102
|
+
def remove(key)
|
103
|
+
removed_value = nil
|
104
|
+
|
105
|
+
@store.transaction do
|
106
|
+
removed_value = @root.remove(key)
|
107
|
+
end
|
108
|
+
|
109
|
+
removed_value
|
110
|
+
end
|
111
|
+
|
112
|
+
# Delete all entries for which the passed block yields true. The
|
113
|
+
# implementation is optimized for large bulk deletes. It rebuilds a new
|
114
|
+
# BTree for the elements to keep. If only few elements are deleted the
|
115
|
+
# overhead of rebuilding the BTree is rather high.
|
116
|
+
# @yield [key, value]
|
117
|
+
def delete_if
|
118
|
+
old_root = @root
|
119
|
+
clear
|
120
|
+
old_root.each do |k, v|
|
121
|
+
if !yield(k, v)
|
122
|
+
insert(k, v)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# @return [Integer] The number of entries stored in the tree.
|
128
|
+
def length
|
129
|
+
@entry_counter
|
130
|
+
end
|
131
|
+
|
132
|
+
# Return true if the BigTree has no stored entries.
|
133
|
+
def empty?
|
134
|
+
@entry_counter == 0
|
135
|
+
end
|
136
|
+
|
137
|
+
# Iterate over all entries in the tree. Entries are always sorted by the
|
138
|
+
# key.
|
139
|
+
# @yield [key, value]
|
140
|
+
def each(&block)
|
141
|
+
node = @first_leaf
|
142
|
+
while node
|
143
|
+
node.each_element(&block)
|
144
|
+
node = node.next_sibling
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Iterate over all entries in the tree in reverse order. Entries are
|
149
|
+
# always sorted by the key.
|
150
|
+
# @yield [key, value]
|
151
|
+
def reverse_each(&block)
|
152
|
+
node = @last_leaf
|
153
|
+
while node
|
154
|
+
node.reverse_each_element(&block)
|
155
|
+
node = node.prev_sibling
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# @return [String] Human reable form of the tree.
|
161
|
+
def to_s
|
162
|
+
@root.to_s
|
163
|
+
end
|
164
|
+
|
165
|
+
# Check if the tree file contains any errors.
|
166
|
+
# @return [Boolean] true if no erros were found, false otherwise
|
167
|
+
def check(&block)
|
168
|
+
@root.check(&block)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Gather some statistics regarding the tree structure.
|
172
|
+
# @return [Stats] Structs with gathered data
|
173
|
+
def statistics
|
174
|
+
stats = Stats.new(0, 0, nil, nil)
|
175
|
+
@root.statistics(stats)
|
176
|
+
stats
|
177
|
+
end
|
178
|
+
|
179
|
+
private
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
@@ -0,0 +1,873 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigTreeNode.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/Array'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The BigTreeNode class provides the BTree nodes for the BigTree objects.
|
34
|
+
# A node can either be a branch node or a leaf node. Branch nodes don't
|
35
|
+
# store values, only references to child nodes. Leaf nodes don't have child
|
36
|
+
# nodes but store the actual values. All nodes store a list of keys that are
|
37
|
+
# used to naviate the tree and find the values. A key is either directly
|
38
|
+
# associated with a value or determines the lower key boundary for the
|
39
|
+
# following child node.
|
40
|
+
class BigTreeNode < PEROBS::Object
|
41
|
+
|
42
|
+
attr_persist :tree, :parent, :keys, :values, :children,
|
43
|
+
:prev_sibling, :next_sibling
|
44
|
+
|
45
|
+
# Internal constructor. Use Store.new(BigTreeNode, ...) instead.
|
46
|
+
# @param p [Handle]
|
47
|
+
# @param tree [BigTree] The tree this node should belong to
|
48
|
+
# @param is_leaf [Boolean] True if a leaf node should be created, false
|
49
|
+
# for a branch node.
|
50
|
+
# @param parent [BigTreeNode] Parent node
|
51
|
+
# @param prev_sibling [BigTreeNode] Previous sibling
|
52
|
+
# @param next_sibling [BigTreeNode] Next sibling
|
53
|
+
def initialize(p, tree, is_leaf, parent = nil, prev_sibling = nil,
|
54
|
+
next_sibling = nil)
|
55
|
+
super(p)
|
56
|
+
self.tree = tree
|
57
|
+
self.parent = parent
|
58
|
+
self.keys = @store.new(PEROBS::Array)
|
59
|
+
|
60
|
+
if is_leaf
|
61
|
+
# Create a new leaf node. It stores values and has no children.
|
62
|
+
self.values = @store.new(PEROBS::Array)
|
63
|
+
self.children = nil
|
64
|
+
else
|
65
|
+
# Create a new tree node. It doesn't store values and can have child
|
66
|
+
# nodes.
|
67
|
+
self.children = @store.new(PEROBS::Array)
|
68
|
+
self.values = nil
|
69
|
+
end
|
70
|
+
# Link the neighboring siblings to the newly inserted node. If the node
|
71
|
+
# is a leaf node and has no sibling on a side we also must register it
|
72
|
+
# as first or last leaf with the BigTree object.
|
73
|
+
if (self.prev_sibling = prev_sibling)
|
74
|
+
@prev_sibling.next_sibling = myself
|
75
|
+
elsif is_leaf?
|
76
|
+
@tree.first_leaf = myself
|
77
|
+
end
|
78
|
+
if (self.next_sibling = next_sibling)
|
79
|
+
@next_sibling.prev_sibling = myself
|
80
|
+
elsif is_leaf?
|
81
|
+
@tree.last_leaf = myself
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# @return [Boolean] True if this is a leaf node, false otherwise.
|
86
|
+
def is_leaf?
|
87
|
+
@children.nil?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Insert or replace the given value by using the key as unique address.
|
91
|
+
# @param key [Integer] Unique key to retrieve the value
|
92
|
+
# @param value [Integer] value to insert
|
93
|
+
def insert(key, value)
|
94
|
+
node = myself
|
95
|
+
|
96
|
+
# Traverse the tree to find the right node to add or replace the value.
|
97
|
+
while node do
|
98
|
+
# All nodes that we find on the way that are full will be split into
|
99
|
+
# two half-full nodes.
|
100
|
+
if node.keys.size >= @tree.node_size
|
101
|
+
node = node.split_node
|
102
|
+
end
|
103
|
+
|
104
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
105
|
+
if node.is_leaf?
|
106
|
+
return node.insert_element(key, value)
|
107
|
+
else
|
108
|
+
# Descend into the right child node to add the value to.
|
109
|
+
node = node.children[node.search_key_index(key)]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
PEROBS.log.fatal "Could not find proper node to insert into"
|
114
|
+
end
|
115
|
+
|
116
|
+
# Return the value that matches the given key or return nil if they key is
|
117
|
+
# unknown.
|
118
|
+
# @param key [Integer] key to search for
|
119
|
+
# @return [Integer or nil] value that matches the key
|
120
|
+
def get(key)
|
121
|
+
node = self
|
122
|
+
|
123
|
+
while node do
|
124
|
+
# Find index of the entry that best fits the key.
|
125
|
+
i = node.search_key_index(key)
|
126
|
+
if node.is_leaf?
|
127
|
+
# This is a leaf node. Check if there is an exact match for the
|
128
|
+
# given key and return the corresponding value or nil.
|
129
|
+
return node.keys[i] == key ? node.values[i] : nil
|
130
|
+
end
|
131
|
+
|
132
|
+
# Descend into the right child node to continue the search.
|
133
|
+
node = node.children[i]
|
134
|
+
end
|
135
|
+
|
136
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
137
|
+
"looking for key #{key}"
|
138
|
+
end
|
139
|
+
|
140
|
+
# Return the node chain from the root to the leaf node storing the
|
141
|
+
# key/value pair.
|
142
|
+
# @param key [Integer] key to search for
|
143
|
+
# @return [Array of BigTreeNode] node list (may be empty)
|
144
|
+
def node_chain(key)
|
145
|
+
node = myself
|
146
|
+
list = [ node ]
|
147
|
+
|
148
|
+
while node do
|
149
|
+
# Find index of the entry that best fits the key.
|
150
|
+
i = node.search_key_index(key)
|
151
|
+
if node.is_leaf?
|
152
|
+
# This is a leaf node. Check if there is an exact match for the
|
153
|
+
# given key and return the corresponding value or nil.
|
154
|
+
return node.keys[i] == key ? list : []
|
155
|
+
end
|
156
|
+
|
157
|
+
# Add current node to chain.
|
158
|
+
list << node
|
159
|
+
# Descend into the right child node to continue the search.
|
160
|
+
node = node.children[i]
|
161
|
+
end
|
162
|
+
|
163
|
+
PEROBS.log.fatal "Could not find node chain for key #{key}"
|
164
|
+
end
|
165
|
+
|
166
|
+
# Return if given key is stored in the node.
|
167
|
+
# @param key [Integer] key to search for
|
168
|
+
# @return [Boolean] True if key was found, false otherwise
|
169
|
+
def has_key?(key)
|
170
|
+
node = self
|
171
|
+
|
172
|
+
while node do
|
173
|
+
# Find index of the entry that best fits the key.
|
174
|
+
i = node.search_key_index(key)
|
175
|
+
if node.is_leaf?
|
176
|
+
# This is a leaf node. Check if there is an exact match for the
|
177
|
+
# given key and return the corresponding value or nil.
|
178
|
+
return node.keys[i] == key
|
179
|
+
end
|
180
|
+
|
181
|
+
# Descend into the right child node to continue the search.
|
182
|
+
node = node.children[i]
|
183
|
+
end
|
184
|
+
|
185
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
186
|
+
"looking for key #{key}"
|
187
|
+
end
|
188
|
+
|
189
|
+
# Return the value that matches the given key and remove the value from
|
190
|
+
# the tree. Return nil if the key is unknown.
|
191
|
+
# @param key [Integer] key to search for
|
192
|
+
# @return [Object] value that matches the key
|
193
|
+
def remove(key)
|
194
|
+
node = self
|
195
|
+
|
196
|
+
while node do
|
197
|
+
# Find index of the entry that best fits the key.
|
198
|
+
i = node.search_key_index(key)
|
199
|
+
if node.is_leaf?
|
200
|
+
# This is a leaf node. Check if there is an exact match for the
|
201
|
+
# given key and return the corresponding value or nil.
|
202
|
+
if node.keys[i] == key
|
203
|
+
@tree.entry_counter -= 1
|
204
|
+
return node.remove_element(i)
|
205
|
+
else
|
206
|
+
return nil
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Descend into the right child node to continue the search.
|
211
|
+
node = node.children[i]
|
212
|
+
end
|
213
|
+
|
214
|
+
PEROBS.log.fatal 'Could not find proper node to remove from'
|
215
|
+
end
|
216
|
+
|
217
|
+
# Iterate over all the key/value pairs in this node and all sub-nodes.
|
218
|
+
# @yield [key, value]
|
219
|
+
def each
|
220
|
+
traverse do |node, position, stack|
|
221
|
+
if node.is_leaf? && position < node.keys.size
|
222
|
+
yield(node.keys[position], node.values[position])
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# Iterate over all the key/value pairs of the node.
|
228
|
+
# @yield [key, value]
|
229
|
+
def each_element
|
230
|
+
return unless is_leaf?
|
231
|
+
|
232
|
+
0.upto(@keys.length - 1) do |i|
|
233
|
+
yield(@keys[i], @values[i])
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# Iterate over all the key/value pairs of the node in reverse order.
|
238
|
+
# @yield [key, value]
|
239
|
+
def reverse_each_element
|
240
|
+
return unless is_leaf?
|
241
|
+
|
242
|
+
(@keys.length - 1).downto(0) do |i|
|
243
|
+
yield(@keys[i], @values[i])
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Check consistency of the node and all subsequent nodes. In case an error
|
248
|
+
# is found, a message is logged and false is returned.
|
249
|
+
# @yield [key, value]
|
250
|
+
# @return [Boolean] true if tree has no errors
|
251
|
+
def check
|
252
|
+
branch_depth = nil
|
253
|
+
|
254
|
+
traverse do |node, position, stack|
|
255
|
+
if position == 0
|
256
|
+
if node.parent
|
257
|
+
# After a split the nodes will only have half the maximum keys.
|
258
|
+
# For branch nodes one of the split nodes will have even 1 key
|
259
|
+
# less as this will become the branch key in a parent node.
|
260
|
+
if node.keys.size < min_keys - (node.is_leaf? ? 0 : 1)
|
261
|
+
node.error "BigTree node #{node._id} has too few keys"
|
262
|
+
return false
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
if node.keys.size > @tree.node_size
|
267
|
+
node.error "BigTree node must not have more then " +
|
268
|
+
"#{@tree.node_size} keys, but has #{node.keys.size} keys"
|
269
|
+
return false
|
270
|
+
end
|
271
|
+
|
272
|
+
last_key = nil
|
273
|
+
node.keys.each do |key|
|
274
|
+
if last_key && key < last_key
|
275
|
+
node.error "Keys are not increasing monotoneously: " +
|
276
|
+
"#{node.keys.inspect}"
|
277
|
+
return false
|
278
|
+
end
|
279
|
+
last_key = key
|
280
|
+
end
|
281
|
+
|
282
|
+
if node.is_leaf?
|
283
|
+
if branch_depth
|
284
|
+
unless branch_depth == stack.size
|
285
|
+
node.error "All leaf nodes must have same distance from root"
|
286
|
+
return false
|
287
|
+
end
|
288
|
+
else
|
289
|
+
branch_depth = stack.size
|
290
|
+
end
|
291
|
+
if node.prev_sibling.nil?
|
292
|
+
if @tree.first_leaf != node
|
293
|
+
node.error "Leaf node #{node._id} has no previous sibling " +
|
294
|
+
"but is not the first leaf of the tree"
|
295
|
+
return false
|
296
|
+
end
|
297
|
+
elsif node.prev_sibling.next_sibling != node
|
298
|
+
node.error "next_sibling of previous sibling does not point to " +
|
299
|
+
"this node"
|
300
|
+
return false
|
301
|
+
end
|
302
|
+
if node.next_sibling.nil?
|
303
|
+
if @tree.last_leaf != node
|
304
|
+
node.error "Leaf node #{node._id} has no next sibling " +
|
305
|
+
"but is not the last leaf of the tree"
|
306
|
+
return false
|
307
|
+
end
|
308
|
+
elsif node.next_sibling.prev_sibling != node
|
309
|
+
node.error "previous_sibling of next sibling does not point to " +
|
310
|
+
"this node"
|
311
|
+
return false
|
312
|
+
end
|
313
|
+
unless node.keys.size == node.values.size
|
314
|
+
node.error "Key count (#{node.keys.size}) and value " +
|
315
|
+
"count (#{node.values.size}) don't match"
|
316
|
+
return false
|
317
|
+
end
|
318
|
+
if node.children
|
319
|
+
node.error "children must be nil for a leaf node"
|
320
|
+
return false
|
321
|
+
end
|
322
|
+
else
|
323
|
+
if node.values
|
324
|
+
node.error "values must be nil for a branch node"
|
325
|
+
return false
|
326
|
+
end
|
327
|
+
unless node.children.size == node.keys.size + 1
|
328
|
+
node.error "Key count (#{node.keys.size}) must be one " +
|
329
|
+
"less than children count (#{node.children.size})"
|
330
|
+
return false
|
331
|
+
end
|
332
|
+
node.children.each_with_index do |child, i|
|
333
|
+
unless child.is_a?(BigTreeNode)
|
334
|
+
node.error "Child #{i} is of class #{child.class} " +
|
335
|
+
"instead of BigTreeNode"
|
336
|
+
return false
|
337
|
+
end
|
338
|
+
unless child.parent.is_a?(BigTreeNode)
|
339
|
+
node.error "Parent reference of child #{i} is of class " +
|
340
|
+
"#{child.class} instead of BigTreeNode"
|
341
|
+
return false
|
342
|
+
end
|
343
|
+
if child == node
|
344
|
+
node.error "Child #{i} point to self"
|
345
|
+
return false
|
346
|
+
end
|
347
|
+
if stack.include?(child)
|
348
|
+
node.error "Child #{i} points to ancester node"
|
349
|
+
return false
|
350
|
+
end
|
351
|
+
unless child.parent == node
|
352
|
+
node.error "Child #{i} does not have parent pointing " +
|
353
|
+
"to this node"
|
354
|
+
return false
|
355
|
+
end
|
356
|
+
if i > 0
|
357
|
+
unless node.children[i - 1].next_sibling == child
|
358
|
+
node.error "next_sibling of node " +
|
359
|
+
"#{node.children[i - 1]._id} " +
|
360
|
+
"must point to node #{child._id}"
|
361
|
+
return false
|
362
|
+
end
|
363
|
+
end
|
364
|
+
if i < node.children.length - 1
|
365
|
+
unless child == node.children[i + 1].prev_sibling
|
366
|
+
node.error "prev_sibling of node " +
|
367
|
+
"#{node.children[i + 1]._id} " +
|
368
|
+
"must point to node #{child._id}"
|
369
|
+
return false
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
elsif position <= node.keys.size
|
375
|
+
# These checks are done after we have completed the respective child
|
376
|
+
# node with index 'position - 1'.
|
377
|
+
index = position - 1
|
378
|
+
if node.is_leaf?
|
379
|
+
if block_given?
|
380
|
+
# If a block was given, call this block with the key and value.
|
381
|
+
return false unless yield(node.keys[index], node.values[index])
|
382
|
+
end
|
383
|
+
else
|
384
|
+
unless node.children[index].keys.last < node.keys[index]
|
385
|
+
node.error "Child #{node.children[index]._id} " +
|
386
|
+
"has too large key #{node.children[index].keys.last}. " +
|
387
|
+
"Must be smaller than #{node.keys[index]}."
|
388
|
+
return false
|
389
|
+
end
|
390
|
+
unless node.children[position].keys.first >= node.keys[index]
|
391
|
+
node.error "Child #{node.children[position]._id} " +
|
392
|
+
"has too small key #{node.children[position].keys.first}. " +
|
393
|
+
"Must be larger than or equal to #{node.keys[index]}."
|
394
|
+
return false
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
true
|
401
|
+
end
|
402
|
+
|
403
|
+
# @return [String] Human reable form of the sub-tree.
|
404
|
+
def to_s
|
405
|
+
str = ''
|
406
|
+
|
407
|
+
traverse do |node, position, stack|
|
408
|
+
if position == 0
|
409
|
+
begin
|
410
|
+
str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
|
411
|
+
"#{node.tree_branch_mark}-" +
|
412
|
+
"#{node.keys.first.nil? ? '--' : 'v-'}#{node.tree_summary}\n"
|
413
|
+
rescue => e
|
414
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
415
|
+
end
|
416
|
+
else
|
417
|
+
begin
|
418
|
+
if node.is_leaf?
|
419
|
+
if node.keys[position - 1]
|
420
|
+
str += "#{node.tree_prefix} |" +
|
421
|
+
"[#{node.keys[position - 1]}, " +
|
422
|
+
"#{node.values[position - 1]}]\n"
|
423
|
+
end
|
424
|
+
else
|
425
|
+
if node.keys[position - 1]
|
426
|
+
str += "#{node.tree_prefix} #{node.keys[position - 1]}\n"
|
427
|
+
end
|
428
|
+
end
|
429
|
+
rescue => e
|
430
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
str
|
436
|
+
end
|
437
|
+
|
438
|
+
# Split the current node into two nodes. The upper half of the elements
|
439
|
+
# will be moved into a newly created node. This node will retain the lower
|
440
|
+
# half.
|
441
|
+
# @return [BigTreeNode] common parent of the two nodes
|
442
|
+
def split_node
|
443
|
+
unless @parent
|
444
|
+
# The node is the root node. We need to create a parent node first.
|
445
|
+
self.parent = @store.new(BigTreeNode, @tree, false)
|
446
|
+
@parent.children[0] = myself
|
447
|
+
@tree.root = @parent
|
448
|
+
end
|
449
|
+
|
450
|
+
# Create the new sibling that will take the 2nd half of the
|
451
|
+
# node content.
|
452
|
+
sibling = @store.new(BigTreeNode, @tree, is_leaf?, @parent, myself,
|
453
|
+
@next_sibling)
|
454
|
+
# Determine the index of the middle element that gets moved to the
|
455
|
+
# parent. The node size must be an uneven number.
|
456
|
+
mid = @keys.size / 2
|
457
|
+
# Insert the middle element key into the parent node
|
458
|
+
@parent.insert_element(@keys[mid], sibling)
|
459
|
+
if is_leaf?
|
460
|
+
# Copy the keys and values from the mid element onwards into the new
|
461
|
+
# sibling node.
|
462
|
+
sibling.keys += @keys[mid..-1]
|
463
|
+
sibling.values += @values[mid..-1]
|
464
|
+
# Delete the copied keys and values from this node.
|
465
|
+
@values.slice!(mid..-1)
|
466
|
+
else
|
467
|
+
# Copy the keys from after the mid value onwards to the new sibling
|
468
|
+
# node.
|
469
|
+
sibling.keys += @keys[mid + 1..-1]
|
470
|
+
# Same for the children.
|
471
|
+
sibling.children += @children[mid + 1..-1]
|
472
|
+
# Reparent the children to the new sibling parent.
|
473
|
+
sibling.children.each { |c| c.parent = sibling }
|
474
|
+
# And delete the copied children references.
|
475
|
+
@children.slice!(mid + 1..-1)
|
476
|
+
end
|
477
|
+
# Delete the copied keys from this node.
|
478
|
+
@keys.slice!(mid..-1)
|
479
|
+
|
480
|
+
@parent
|
481
|
+
end
|
482
|
+
|
483
|
+
# Insert the given value or child into the current node using the key as
|
484
|
+
# index.
|
485
|
+
# @param key [Integer] key to address the value or child
|
486
|
+
# @param child_or_value [Integer or BigTreeNode] value or BigTreeNode
|
487
|
+
# @return [Boolean] true if new element, false if override existing
|
488
|
+
# element
|
489
|
+
def insert_element(key, child_or_value)
|
490
|
+
if @keys.size >= @tree.node_size
|
491
|
+
PEROBS.log.fatal "Cannot insert into a full BigTreeNode: #{@keys.size}"
|
492
|
+
end
|
493
|
+
|
494
|
+
i = search_key_index(key)
|
495
|
+
if @keys[i] == key
|
496
|
+
# Overwrite existing entries
|
497
|
+
@keys[i] = key
|
498
|
+
if is_leaf?
|
499
|
+
@values[i] = child_or_value
|
500
|
+
else
|
501
|
+
@children[i + 1] = child_or_value
|
502
|
+
end
|
503
|
+
else
|
504
|
+
# Create a new entry
|
505
|
+
@keys.insert(i, key)
|
506
|
+
if is_leaf?
|
507
|
+
@values.insert(i, child_or_value)
|
508
|
+
@tree.entry_counter += 1
|
509
|
+
else
|
510
|
+
@children.insert(i + 1, child_or_value)
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
# Remove the element from a leaf node at the given index.
|
516
|
+
# @param index [Integer] The index of the entry to be removed
|
517
|
+
# @return [Object] The removed value
|
518
|
+
def remove_element(index)
|
519
|
+
# Delete the key at the specified index.
|
520
|
+
unless (key = @keys.delete_at(index))
|
521
|
+
PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
|
522
|
+
"@#{@_id}"
|
523
|
+
end
|
524
|
+
update_branch_key(key) if index == 0
|
525
|
+
|
526
|
+
# Delete the corresponding value.
|
527
|
+
removed_value = @values.delete_at(index)
|
528
|
+
if @keys.length < min_keys
|
529
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
530
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
531
|
+
@prev_sibling.merge_with_leaf_node(myself)
|
532
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
533
|
+
borrow_from_next_sibling(@next_sibling) ||
|
534
|
+
merge_with_leaf_node(@next_sibling)
|
535
|
+
elsif @parent
|
536
|
+
PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
# The merge has potentially invalidated this node. After this method has
|
541
|
+
# been called this copy of the node should no longer be used.
|
542
|
+
removed_value
|
543
|
+
end
|
544
|
+
|
545
|
+
# Remove the specified node from this branch node.
|
546
|
+
# @param node [BigTreeNode] The child to remove
|
547
|
+
def remove_child(node)
|
548
|
+
unless (index = search_node_index(node))
|
549
|
+
PEROBS.log.fatal "Cannot remove child #{node._id} from node #{@_id}"
|
550
|
+
end
|
551
|
+
|
552
|
+
if index == 0
|
553
|
+
# Removing the first child is a bit more complicated as the
|
554
|
+
# corresponding branch key is in a parent node.
|
555
|
+
key = @keys.shift
|
556
|
+
update_branch_key(key)
|
557
|
+
else
|
558
|
+
# For all other children we can just remove the corresponding key.
|
559
|
+
@keys.delete_at(index - 1)
|
560
|
+
end
|
561
|
+
|
562
|
+
# Remove the child node link.
|
563
|
+
child = @children.delete_at(index)
|
564
|
+
# If we remove the first or last leaf node we must update the reference
|
565
|
+
# in the BigTree object.
|
566
|
+
@tree.first_leaf = child.next_sibling if child == @tree.first_leaf
|
567
|
+
@tree.last_leaf = child.prev_sibling if child == @tree.last_leaf
|
568
|
+
# Unlink the neighbouring siblings from the child
|
569
|
+
child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
|
570
|
+
child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
|
571
|
+
|
572
|
+
if @keys.length < min_keys
|
573
|
+
# The node has become too small. Try borrowing a node from an adjecent
|
574
|
+
# sibling or merge with an adjecent node.
|
575
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
576
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
577
|
+
@prev_sibling.merge_with_branch_node(myself)
|
578
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
579
|
+
borrow_from_next_sibling(@next_sibling) ||
|
580
|
+
merge_with_branch_node(@next_sibling)
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
if @parent.nil? && @children.length <= 1
|
585
|
+
# If the node just below the root only has one child it will become
|
586
|
+
# the new root node.
|
587
|
+
new_root = @children.first
|
588
|
+
new_root.parent = nil
|
589
|
+
@tree.root = new_root
|
590
|
+
end
|
591
|
+
end
|
592
|
+
|
593
|
+
def merge_with_leaf_node(node)
|
594
|
+
if @keys.length + node.keys.length > @tree.node_size
|
595
|
+
PEROBS.log.fatal "Leaf nodes are too big to merge"
|
596
|
+
end
|
597
|
+
|
598
|
+
self.keys += node.keys
|
599
|
+
self.values += node.values
|
600
|
+
|
601
|
+
node.parent.remove_child(node)
|
602
|
+
end
|
603
|
+
|
604
|
+
def merge_with_branch_node(node)
|
605
|
+
if @keys.length + 1 + node.keys.length > @tree.node_size
|
606
|
+
PEROBS.log.fatal "Branch nodes are too big to merge"
|
607
|
+
end
|
608
|
+
|
609
|
+
index = @parent.search_node_index(node) - 1
|
610
|
+
self.keys << @parent.keys[index]
|
611
|
+
self.keys += node.keys
|
612
|
+
node.children.each { |c| c.parent = myself }
|
613
|
+
self.children += node.children
|
614
|
+
|
615
|
+
node.parent.remove_child(node)
|
616
|
+
end
|
617
|
+
|
618
|
+
# Search the keys of the node that fits the given key. The result is
|
619
|
+
# either the index of an exact match or the index of the position where
|
620
|
+
# the given key would have to be inserted.
|
621
|
+
# @param key [Integer] key to search for
|
622
|
+
# @return [Integer] Index of the matching key or the insert position.
|
623
|
+
def search_key_index(key)
|
624
|
+
# Handle special case for empty keys list.
|
625
|
+
return 0 if @keys.empty?
|
626
|
+
|
627
|
+
# Keys are unique and always sorted. Use a binary search to find the
|
628
|
+
# index that fits the given key.
|
629
|
+
li = pi = 0
|
630
|
+
ui = @keys.size - 1
|
631
|
+
while li <= ui
|
632
|
+
# The pivot element is always in the middle between the lower and upper
|
633
|
+
# index.
|
634
|
+
pi = li + (ui - li) / 2
|
635
|
+
|
636
|
+
if key < @keys[pi]
|
637
|
+
# The pivot element is smaller than the key. Set the upper index to
|
638
|
+
# the pivot index.
|
639
|
+
ui = pi - 1
|
640
|
+
elsif key > @keys[pi]
|
641
|
+
# The pivot element is larger than the key. Set the lower index to
|
642
|
+
# the pivot index.
|
643
|
+
li = pi + 1
|
644
|
+
else
|
645
|
+
# We've found an exact match. For leaf nodes return the found index.
|
646
|
+
# For branch nodes we have to add one to the index since the larger
|
647
|
+
# child is the right one.
|
648
|
+
return is_leaf? ? pi : pi + 1
|
649
|
+
end
|
650
|
+
end
|
651
|
+
# No exact match was found. For the insert operaton we need to return
|
652
|
+
# the index of the first key that is larger than the given key.
|
653
|
+
@keys[pi] < key ? pi + 1 : pi
|
654
|
+
end
|
655
|
+
|
656
|
+
def search_node_index(node)
|
657
|
+
index = search_key_index(node.keys.first)
|
658
|
+
unless @children[index] == node
|
659
|
+
raise RuntimeError, "Child at index #{index} is not the requested node"
|
660
|
+
end
|
661
|
+
|
662
|
+
index
|
663
|
+
end
|
664
|
+
|
665
|
+
# This is a generic tree iterator. It yields before it descends into the
|
666
|
+
# child node and after (which is identical to before the next child
|
667
|
+
# descend). It yields the node, the position and the stack of parent
|
668
|
+
# nodes.
|
669
|
+
# @yield [node, position, stack]
|
670
|
+
def traverse
|
671
|
+
# We use a non-recursive implementation to traverse the tree. This stack
|
672
|
+
# keeps track of all the known still to be checked nodes.
|
673
|
+
stack = [ [ self, 0 ] ]
|
674
|
+
|
675
|
+
while !stack.empty?
|
676
|
+
node, position = stack.pop
|
677
|
+
|
678
|
+
# Call the payload method. The position marks where we are in the node
|
679
|
+
# with respect to the traversal. 0 means we've just entered the node
|
680
|
+
# for the first time and are about to descent to the first child.
|
681
|
+
# Position 1 is after the 1st child has been processed and before the
|
682
|
+
# 2nd child is being processed. If we have N children, the last
|
683
|
+
# position is N after we have processed the last child and are about
|
684
|
+
# to return to the parent node.
|
685
|
+
yield(node, position, stack)
|
686
|
+
|
687
|
+
if position <= node.keys.size
|
688
|
+
# Push the next position for this node onto the stack.
|
689
|
+
stack.push([ node, position + 1 ])
|
690
|
+
|
691
|
+
if !node.is_leaf? && node.children[position]
|
692
|
+
# If we have a child node for this position, push the linked node
|
693
|
+
# and the starting position onto the stack.
|
694
|
+
stack.push([ node.children[position], 0 ])
|
695
|
+
end
|
696
|
+
end
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
# Gather some statistics about the node and all sub nodes.
|
701
|
+
# @param stats [Stats] Data structure that stores the gathered data
|
702
|
+
def statistics(stats)
|
703
|
+
traverse do |node, position, stack|
|
704
|
+
if position == 0
|
705
|
+
if node.is_leaf?
|
706
|
+
stats.leaf_nodes += 1
|
707
|
+
depth = stack.size + 1
|
708
|
+
if stats.min_depth.nil? || stats.min_depth < depth
|
709
|
+
stats.min_depth = depth
|
710
|
+
end
|
711
|
+
if stats.max_depth.nil? || stats.max_depth > depth
|
712
|
+
stats.max_depth = depth
|
713
|
+
end
|
714
|
+
else
|
715
|
+
stats.branch_nodes += 1
|
716
|
+
end
|
717
|
+
end
|
718
|
+
end
|
719
|
+
end
|
720
|
+
|
721
|
+
# Return the decoration that marks the tree structure of this node for the
|
722
|
+
# inspection method.
|
723
|
+
def tree_prefix
|
724
|
+
node = self
|
725
|
+
str = ''
|
726
|
+
|
727
|
+
while node
|
728
|
+
is_last_child = false
|
729
|
+
if node.parent
|
730
|
+
is_last_child = node.parent.children.last == node
|
731
|
+
else
|
732
|
+
# Don't add lines for the top-level.
|
733
|
+
break
|
734
|
+
end
|
735
|
+
|
736
|
+
str = (is_last_child ? ' ' : ' |') + str
|
737
|
+
node = node.parent
|
738
|
+
end
|
739
|
+
|
740
|
+
str
|
741
|
+
end
|
742
|
+
|
743
|
+
# Branch node decoration for the inspection method.
|
744
|
+
def tree_branch_mark
|
745
|
+
return '' unless @parent
|
746
|
+
'-'
|
747
|
+
end
|
748
|
+
|
749
|
+
# Text for the node line for the inspection method.
|
750
|
+
def tree_summary
|
751
|
+
s = " @#{@_id}"
|
752
|
+
if @parent
|
753
|
+
begin
|
754
|
+
s += " ^#{@parent._id}"
|
755
|
+
rescue
|
756
|
+
s += ' ^@'
|
757
|
+
end
|
758
|
+
end
|
759
|
+
if @prev_sibling
|
760
|
+
begin
|
761
|
+
s += " <#{@prev_sibling._id}"
|
762
|
+
rescue
|
763
|
+
s += ' <@'
|
764
|
+
end
|
765
|
+
end
|
766
|
+
if @next_sibling
|
767
|
+
begin
|
768
|
+
s += " >#{@next_sibling._id}"
|
769
|
+
rescue
|
770
|
+
s += ' >@'
|
771
|
+
end
|
772
|
+
end
|
773
|
+
|
774
|
+
s
|
775
|
+
end
|
776
|
+
|
777
|
+
# Print and log an error message for the node.
|
778
|
+
def error(msg)
|
779
|
+
msg = "Error in BigTree node @#{@_id}: #{msg}\n" + @tree.to_s
|
780
|
+
$stderr.puts msg
|
781
|
+
PEROBS.log.error msg
|
782
|
+
end
|
783
|
+
|
784
|
+
private
|
785
|
+
|
786
|
+
def min_keys
|
787
|
+
@tree.node_size / 2
|
788
|
+
end
|
789
|
+
|
790
|
+
# Try to borrow an element from the preceding sibling.
|
791
|
+
# @return [True or False] True if an element was borrowed, false
|
792
|
+
# otherwise.
|
793
|
+
def borrow_from_previous_sibling(prev_node)
|
794
|
+
if prev_node.keys.length - 1 > min_keys
|
795
|
+
index = @parent.search_node_index(self) - 1
|
796
|
+
|
797
|
+
if is_leaf?
|
798
|
+
# Move the last key of the previous node to the front of this node
|
799
|
+
@keys.unshift(prev_node.keys.pop)
|
800
|
+
# Register the new lead key of this node with its parent
|
801
|
+
@parent.keys[index] = @keys.first
|
802
|
+
# Move the last value of the previous node to the front of this node
|
803
|
+
@values.unshift(prev_node.values.pop)
|
804
|
+
else
|
805
|
+
# For branch nodes the branch key will be the borrowed key.
|
806
|
+
@keys.unshift(@parent.keys[index])
|
807
|
+
# And the last key of the previous key will become the new branch
|
808
|
+
# key for this node.
|
809
|
+
@parent.keys[index] = prev_node.keys.pop
|
810
|
+
# Move the last child of the previous node to the front of this node
|
811
|
+
@children.unshift(node = prev_node.children.pop)
|
812
|
+
node.parent = myself
|
813
|
+
end
|
814
|
+
|
815
|
+
return true
|
816
|
+
end
|
817
|
+
|
818
|
+
false
|
819
|
+
end
|
820
|
+
|
821
|
+
# Try to borrow an element from the next sibling.
|
822
|
+
# @return [True or False] True if an element was borrowed, false
|
823
|
+
# otherwise.
|
824
|
+
def borrow_from_next_sibling(next_node)
|
825
|
+
if next_node.keys.length - 1 > min_keys
|
826
|
+
# The next sibling now has a new lead key that requires the branch key
|
827
|
+
# to be updated in the parent node.
|
828
|
+
index = next_node.parent.search_node_index(next_node) - 1
|
829
|
+
|
830
|
+
if is_leaf?
|
831
|
+
# Move the first key of the next node to the end of the this node
|
832
|
+
self.keys << next_node.keys.shift
|
833
|
+
# Register the new lead key of next_node with its parent
|
834
|
+
next_node.parent.keys[index] = next_node.keys.first
|
835
|
+
# Move the first value of the next node to the end of this node
|
836
|
+
self.values << next_node.values.shift
|
837
|
+
else
|
838
|
+
# For branch nodes we need to get the lead key from the parent of
|
839
|
+
# next_node.
|
840
|
+
self.keys << next_node.parent.keys[index]
|
841
|
+
# The old lead key of next_node becomes the branch key in the parent
|
842
|
+
# of next_node. And the keys of next_node are shifted.
|
843
|
+
next_node.parent.keys[index] = next_node.keys.shift
|
844
|
+
# Move the first child of the next node to the end of this node
|
845
|
+
self.children << (node = next_node.children.shift)
|
846
|
+
node.parent = myself
|
847
|
+
end
|
848
|
+
|
849
|
+
return true
|
850
|
+
end
|
851
|
+
|
852
|
+
false
|
853
|
+
end
|
854
|
+
|
855
|
+
def update_branch_key(old_key)
|
856
|
+
new_key = @keys.first
|
857
|
+
return unless (node = @parent)
|
858
|
+
|
859
|
+
while node
|
860
|
+
if (index = node.keys.index(old_key))
|
861
|
+
node.keys[index] = new_key
|
862
|
+
return
|
863
|
+
end
|
864
|
+
node = node.parent
|
865
|
+
end
|
866
|
+
|
867
|
+
# The smallest element has no branch key.
|
868
|
+
end
|
869
|
+
|
870
|
+
end
|
871
|
+
|
872
|
+
end
|
873
|
+
|