perobs 4.0.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,267 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArray.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/BigArrayNode'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArray class implements an Array that stores the data in segments. It
35
+ # only loads the currently needed parts of the Array into memory. To provide
36
+ # an efficient access to the data by index a B+Tree like data structure is
37
+ # used. Each segment is stored in a leaf node of the B+Tree.
38
+ class BigArray < PEROBS::Object
39
+
40
+ class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
41
+ :max_depth)
42
+ end
43
+
44
+ attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
45
+
46
+ # Internal constructor. Use Store.new() instead.
47
+ # @param p [Handle]
48
+ # @param node_size [Integer] The size of the tree nodes. This determines
49
+ # how many entries must be read/written for each operation. The
50
+ # default of 150 was emperically found to be a performance sweet
51
+ # spot. Smaller values will improve write operations. Larger
52
+ # values will improve read operations. 20 - 500 is a reasonable
53
+ # range to try.
54
+ def initialize(p, node_size = 150)
55
+ super(p)
56
+ unless node_size > 3
57
+ PEROBS.log.fatal "Node size (#{node_size}) must be larger than 3"
58
+ end
59
+ unless node_size % 2 == 0
60
+ PEROBS.log.fatal "Node size (#{node_size}) must be an even number"
61
+ end
62
+
63
+ self.node_size = node_size
64
+ clear
65
+ end
66
+
67
+ # Remove all entries from the BigArray.
68
+ def clear
69
+ self.root = self.first_leaf = self.last_leaf =
70
+ @store.new(BigArrayNode, myself, true)
71
+ self.entry_counter = 0
72
+ end
73
+
74
+ # Store the value at the given index. If the index already exists the old
75
+ # value will be overwritten.
76
+ # @param index [Integer] Position in the array
77
+ # @param value [Integer] value
78
+ def []=(index, value)
79
+ index = validate_index_range(index)
80
+
81
+ @store.transaction do
82
+ if index < @entry_counter
83
+ # Overwrite of an existing element
84
+ @root.set(index, value)
85
+ elsif index == @entry_counter
86
+ # Append right at the end
87
+ @root.insert(index, value)
88
+ self.entry_counter += 1
89
+ else
90
+ # Append with nil padding
91
+ @entry_counter.upto(index - 1) do |i|
92
+ @root.insert(i, nil)
93
+ end
94
+ @root.insert(index, value)
95
+ self.entry_counter = index + 1
96
+ end
97
+ end
98
+ end
99
+
100
+ def <<(value)
101
+ self[@entry_counter] = value
102
+ end
103
+
104
+ # Insert the value at the given index. If the index already exists the old
105
+ # value will be overwritten.
106
+ # @param index [Integer] Position in the array
107
+ # @param value [Integer] value
108
+ def insert(index, value)
109
+ index = validate_index_range(index)
110
+
111
+ if index < @entry_counter
112
+ # Insert in between existing elements
113
+ @store.transaction do
114
+ @root.insert(index, value)
115
+ self.entry_counter += 1
116
+ end
117
+ else
118
+ self[index] = value
119
+ end
120
+ end
121
+
122
+ # Return the value stored at the given index.
123
+ # @param index [Integer] Position in the array
124
+ # @return [Integer or nil] found value or nil
125
+ def [](index)
126
+ index = validate_index_range(index)
127
+
128
+ return nil if index >= @entry_counter
129
+
130
+ @root.get(index)
131
+ end
132
+
133
+ # Check if there is an entry for the given key.
134
+ # @param key [Integer] Unique key
135
+ # @return [Boolean] True if key is present, false otherwise.
136
+ def has_key?(key)
137
+ @root.has_key?(key)
138
+ end
139
+
140
+ # Delete the element at the specified index, returning that element, or
141
+ # nil if the index is out of range.
142
+ # @param index [Integer] Index in the BigArray
143
+ # @return [Object] found value or nil
144
+ def delete_at(index)
145
+ if index < 0
146
+ index = @entry_counter + index
147
+ end
148
+
149
+ return nil if index < 0 || index >= @entry_counter
150
+
151
+ deleted_value = nil
152
+ @store.transaction do
153
+ deleted_value = @root.delete_at(index)
154
+ self.entry_counter -= 1
155
+
156
+ # Eliminate single entry nodes at the top.
157
+ while !@root.is_leaf? && @root.size == 1
158
+ @root = @root.children.first
159
+ @root.parent = nil
160
+ end
161
+ end
162
+
163
+ deleted_value
164
+ end
165
+
166
+ # Delete all entries for which the passed block yields true. The
167
+ # implementation is optimized for large bulk deletes. It rebuilds a new
168
+ # BTree for the elements to keep. If only few elements are deleted the
169
+ # overhead of rebuilding the BTree is rather high.
170
+ # @yield [key, value]
171
+ def delete_if
172
+ old_root = @root
173
+ clear
174
+ old_root.each do |k, v|
175
+ if !yield(k, v)
176
+ insert(k, v)
177
+ end
178
+ end
179
+ end
180
+
181
+ # @return [Integer] The number of entries stored in the tree.
182
+ def length
183
+ @entry_counter
184
+ end
185
+
186
+ alias size length
187
+
188
+ # Return true if the BigArray has no stored entries.
189
+ def empty?
190
+ @entry_counter == 0
191
+ end
192
+
193
+ # Iterate over all entries in the tree. Entries are always sorted by the
194
+ # key.
195
+ # @yield [key, value]
196
+ def each(&block)
197
+ node = @first_leaf
198
+ while node
199
+ break unless node.each(&block)
200
+ node = node.next_sibling
201
+ end
202
+ end
203
+
204
+ # Iterate over all entries in the tree in reverse order. Entries are
205
+ # always sorted by the key.
206
+ # @yield [key, value]
207
+ def reverse_each(&block)
208
+ node = @last_leaf
209
+ while node
210
+ break unless node.reverse_each(&block)
211
+ node = node.prev_sibling
212
+ end
213
+ end
214
+
215
+ # Convert the BigArray into a Ruby Array. This is primarily intended for
216
+ # debugging as real-world BigArray objects are likely too big to fit into
217
+ # memory.
218
+ def to_a
219
+ ary = []
220
+ node = @first_leaf
221
+ while node do
222
+ ary += node.values
223
+ node = node.next_sibling
224
+ end
225
+
226
+ ary
227
+ end
228
+
229
+ # @return [String] Human reable form of the tree. This is only intended
230
+ # for debugging and should only be used with small BigArray objects.
231
+ def to_s
232
+ @root.to_s
233
+ end
234
+
235
+ # Check if the tree file contains any errors.
236
+ # @return [Boolean] true if no erros were found, false otherwise
237
+ def check(&block)
238
+ @root.check(&block)
239
+ end
240
+
241
+ # Gather some statistics regarding the tree structure.
242
+ # @return [Stats] Structs with gathered data
243
+ def statistics
244
+ stats = Stats.new(0, 0, nil, nil)
245
+ @root.statistics(stats)
246
+ stats
247
+ end
248
+
249
+ private
250
+
251
+ def validate_index_range(index)
252
+ if index < 0
253
+ if -index > @entry_counter
254
+ raise IndexError, "index #{index} too small for array; " +
255
+ "minimum #{-@entry_counter}"
256
+ end
257
+
258
+ index = @entry_counter + index
259
+ end
260
+
261
+ index
262
+ end
263
+
264
+ end
265
+
266
+ end
267
+
@@ -0,0 +1,998 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArrayNode.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/Array'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArrayNode class provides the BTree nodes for the BigArray objects.
35
+ # A node can either be a branch node or a leaf node. Branch nodes don't
36
+ # store values, only offsets and references to child nodes. Leaf nodes don't
37
+ # have child nodes but store the actual values. The leaf nodes always
38
+ # contain at least node_size / 2 number of consecutive values. The index of
39
+ # the first value in the BigArray is the sum of the offsets stored in the
40
+ # parent nodes. Branch nodes store the offsets and the corresponding
41
+ # child node references. The first offset is always 0. Consecutive offsets
42
+ # are set to the previous offset plus the total number of values stored in
43
+ # the previous child node. The leaf nodes don't contain wholes. A
44
+ # concatenation of all leaf node values represents the stored Array.
45
+ #
46
+ # Root Node +--------------------------------+
47
+ # Offsets | 0 11 |
48
+ # Children | |
49
+ # v v
50
+ # Level 1 +--------------------------++--------------------------+
51
+ # Offsets | 0 4 7 || 0 2 5 |
52
+ # Children | | | | | |
53
+ # v v v v v v
54
+ # Leaves +---------++-------++----------++-------++----------++-------+
55
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
56
+ #
57
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
58
+ #
59
+ class BigArrayNode < PEROBS::Object
60
+
61
+ attr_persist :tree, :parent, :offsets, :values, :children,
62
+ :prev_sibling, :next_sibling
63
+
64
+ # Internal constructor. Use Store.new(BigArrayNode, ...) instead.
65
+ # @param p [Handle]
66
+ # @param tree [BigArray] The tree this node should belong to
67
+ # @param is_leaf [Boolean] True if a leaf node should be created, false
68
+ # for a branch node.
69
+ # @param parent [BigArrayNode] Parent node
70
+ # @param prev_sibling [BigArrayNode] Previous sibling
71
+ # @param next_sibling [BigArrayNode] Next sibling
72
+ def initialize(p, tree, is_leaf, parent = nil,
73
+ prev_sibling = nil, next_sibling = nil)
74
+ super(p)
75
+ self.tree = tree
76
+ self.parent = parent
77
+
78
+ if is_leaf
79
+ # Create a new leaf node. It stores values and has no children.
80
+ self.values = @store.new(PEROBS::Array)
81
+ self.children = self.offsets = nil
82
+
83
+ # Link the neighboring siblings to the newly inserted node. If the
84
+ # node has no sibling on a side we also must register it as first or
85
+ # last leaf with the BigArray object.
86
+ if (self.prev_sibling = prev_sibling)
87
+ @prev_sibling.next_sibling = myself
88
+ else
89
+ @tree.first_leaf = myself
90
+ end
91
+ if (self.next_sibling = next_sibling)
92
+ @next_sibling.prev_sibling = myself
93
+ else
94
+ @tree.last_leaf = myself
95
+ end
96
+ else
97
+ # Create a new branch node. It stores keys and child node references
98
+ # but no values.
99
+ self.offsets = @store.new(PEROBS::Array)
100
+ self.children = @store.new(PEROBS::Array)
101
+ self.values = nil
102
+ # Branch nodes don't need sibling links.
103
+ self.prev_sibling = self.next_sibling = nil
104
+ end
105
+ end
106
+
107
+ # @return [Boolean] True if this is a leaf node, false otherwise.
108
+ def is_leaf?
109
+ @children.nil?
110
+ end
111
+
112
+ def size
113
+ is_leaf? ? @values.size : @children.size
114
+ end
115
+
116
+ # @return [Integer] the number of values stored in this node.
117
+ def values_count
118
+ count = 0
119
+ node = self
120
+ while node
121
+ if node.is_leaf?
122
+ return count + node.values.size
123
+ else
124
+ count += node.offsets.last
125
+ node = node.children.last
126
+ end
127
+ end
128
+ end
129
+
130
+
131
+ # Set the given value at the given index.
132
+ # @param index [Integer] Position to insert at
133
+ # @param value [Integer] value to insert
134
+ def set(index, value)
135
+ node = self
136
+
137
+ # Traverse the tree to find the right node to add or replace the value.
138
+ while node do
139
+ # Once we have reached a leaf node we can insert or replace the value.
140
+ if node.is_leaf?
141
+ if index >= node.values.size
142
+ node.fatal "Set index (#{index}) larger than values array " +
143
+ "(#{node.values.size})."
144
+ end
145
+ node.values[index] = value
146
+ return
147
+ else
148
+ # Descend into the right child node to add the value to.
149
+ cidx = node.search_child_index(index)
150
+ index -= node.offsets[cidx]
151
+ node = node.children[cidx]
152
+ end
153
+ end
154
+
155
+ node.fatal "Could not find proper node to set the value while " +
156
+ "looking for index #{index}"
157
+ end
158
+
159
+ # Insert the given value at the given index. All following values will be
160
+ # pushed to a higher index.
161
+ # @param index [Integer] Position to insert at
162
+ # @param value [Integer] value to insert
163
+ def insert(index, value)
164
+ node = self
165
+ cidx = nil
166
+
167
+ # Traverse the tree to find the right node to add or replace the value.
168
+ while node do
169
+ # All nodes that we find on the way that are full will be split into
170
+ # two half-full nodes.
171
+ if node.size >= @tree.node_size
172
+ # Re-add the index from the last parent node since we will descent
173
+ # into one of the split nodes.
174
+ index += node.parent.offsets[cidx] if node.parent
175
+ node = node.split_node
176
+ end
177
+
178
+ # Once we have reached a leaf node we can insert or replace the value.
179
+ if node.is_leaf?
180
+ node.values.insert(index, value)
181
+ node.parent.adjust_offsets(node, 1) if node.parent
182
+ return
183
+ else
184
+ # Descend into the right child node to add the value to.
185
+ cidx = node.search_child_index(index)
186
+ if (index -= node.offsets[cidx]) < 0
187
+ node.fatal "Index (#{index}) became negative"
188
+ end
189
+ node = node.children[cidx]
190
+ end
191
+ end
192
+
193
+ node.fatal "Could not find proper node to insert the value while " +
194
+ "looking for index #{index}"
195
+ end
196
+
197
+ # Return the value that matches the given key or return nil if they key is
198
+ # unknown.
199
+ # @param index [Integer] Position to insert at
200
+ # @return [Integer or nil] value that matches the key
201
+ def get(index)
202
+ node = self
203
+
204
+ # Traverse the tree to find the right node to add or replace the value.
205
+ while node do
206
+ # Once we have reached a leaf node we can insert or replace the value.
207
+ if node.is_leaf?
208
+ return node.values[index]
209
+ else
210
+ # Descend into the right child node to add the value to.
211
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
212
+ node.offsets.length) - 1
213
+ if (index -= node.offsets[cidx]) < 0
214
+ node.fatal "Index (#{index}) became negative"
215
+ end
216
+ node = node.children[cidx]
217
+ end
218
+ end
219
+
220
+ PEROBS.log.fatal "Could not find proper node to get from while " +
221
+ "looking for index #{index}"
222
+ end
223
+
224
+ # Delete the element at the specified index, returning that element, or
225
+ # nil if the index is out of range.
226
+ # @param index [Integer] Index in the BigArray
227
+ # @return [Object] found value or nil
228
+ def delete_at(index)
229
+ node = self
230
+ deleted_value = nil
231
+
232
+ while node do
233
+ if node.is_leaf?
234
+ deleted_value = node.values.delete_at(index)
235
+ if node.parent
236
+ node.parent.adjust_offsets(node, -1)
237
+ if node.size < min_size
238
+ node.parent.consolidate_child_nodes(node)
239
+ end
240
+ end
241
+
242
+ return deleted_value
243
+ else
244
+ # Descend into the right child node to add the value to.
245
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
246
+ node.offsets.length) - 1
247
+ if (index -= node.offsets[cidx]) < 0
248
+ node.fatal "Index (#{index}) became negative"
249
+ end
250
+ node = node.children[cidx]
251
+ end
252
+ end
253
+
254
+ PEROBS.log.fatal "Could not find proper node to delete from while " +
255
+ "looking for index #{index}"
256
+ end
257
+
258
+ # Iterate over all the values of the node.
259
+ # @yield [value]
260
+ def each
261
+ return nil unless is_leaf?
262
+
263
+ @values.each do |v|
264
+ yield(v)
265
+ end
266
+ end
267
+
268
+ # Iterate over all the values of the node in reverse order.
269
+ # @yield [value]
270
+ def reverse_each
271
+ return nil unless is_leaf?
272
+
273
+ @values.reverse_each do |v|
274
+ yield(v)
275
+ end
276
+ end
277
+
278
+ # Check consistency of the node and all subsequent nodes. In case an error
279
+ # is found, a message is logged and false is returned.
280
+ # @yield [key, value]
281
+ # @return [Boolean] true if tree has no errors
282
+ def check
283
+ branch_depth = nil
284
+
285
+ traverse do |node, position, stack|
286
+ if position == 0
287
+ # Nodes should have between min_size() and
288
+ # @tree.node_size children or values. Only the root node may have
289
+ # less.
290
+ if node.size > @tree.node_size
291
+ node.error "BigArray node #{node._id} is too large. It has " +
292
+ "#{node.size} nodes instead of max. #{@tree.node_size}."
293
+ return false
294
+ end
295
+ if node.parent && node.size < min_size
296
+ node.error "BigArray node #{node._id} is too small"
297
+ return false
298
+ end
299
+
300
+ if node.is_leaf?
301
+ # All leaf nodes must have same distance from root node.
302
+ if branch_depth
303
+ unless branch_depth == stack.size
304
+ node.error "All leaf nodes must have same distance from root"
305
+ return false
306
+ end
307
+ else
308
+ branch_depth = stack.size
309
+ end
310
+
311
+ return false unless node.check_leaf_node_links
312
+
313
+ if node.children
314
+ node.error "children must be nil for a leaf node"
315
+ return false
316
+ end
317
+ else
318
+ unless node.children.size == node.offsets.size
319
+ node.error "Offset count (#{node.offsets.size}) must be equal " +
320
+ "to children count (#{node.children.size})"
321
+ return false
322
+ end
323
+
324
+ if node.values
325
+ node.error "values must be nil for a branch node"
326
+ return false
327
+ end
328
+
329
+ unless @prev_sibling.nil? && @next_sibling.nil?
330
+ node.error "prev_sibling and next_sibling must be nil for " +
331
+ "branch nodes"
332
+ end
333
+
334
+ return false unless node.check_offsets
335
+
336
+ return false unless node.check_child_nodes(stack)
337
+ end
338
+ elsif position <= node.size
339
+ # These checks are done after we have completed the respective child
340
+ # node with index 'position - 1'.
341
+ index = position - 1
342
+ if node.is_leaf?
343
+ if block_given?
344
+ # If a block was given, call this block with the key and value.
345
+ return false unless yield(node.first_index + index,
346
+ node.values[index])
347
+ end
348
+ end
349
+ end
350
+ end
351
+
352
+ true
353
+ end
354
+
355
+ def check_leaf_node_links
356
+ if @prev_sibling.nil?
357
+ if @tree.first_leaf != self
358
+ error "Leaf node #{@_id} has no previous sibling " +
359
+ "but is not the first leaf of the tree"
360
+ return false
361
+ end
362
+ elsif @prev_sibling.next_sibling != self
363
+ error "next_sibling of previous sibling does not point to " +
364
+ "this node"
365
+ return false
366
+ end
367
+
368
+ if @next_sibling.nil?
369
+ if @tree.last_leaf != self
370
+ error "Leaf node #{@_id} has no next sibling " +
371
+ "but is not the last leaf of the tree"
372
+ return false
373
+ end
374
+ elsif @next_sibling.prev_sibling != self
375
+ error "previous_sibling of next sibling does not point to " +
376
+ "this node"
377
+ return false
378
+ end
379
+
380
+ true
381
+ end
382
+
383
+ def check_offsets
384
+ return true if @parent.nil? && @offsets.empty?
385
+
386
+ if @offsets[0] != 0
387
+ error "First offset is not 0: #{@offsets.inspect}"
388
+ return false
389
+ end
390
+
391
+ last_offset = nil
392
+ @offsets.each_with_index do |offset, i|
393
+ if i > 0
394
+ if offset < last_offset
395
+ error "Offset are not strictly monotoneously " +
396
+ "increasing: #{@offsets.inspect}"
397
+ return false
398
+ end
399
+ expected_offset = last_offset + @children[i - 1].values_count
400
+ if offset != expected_offset
401
+ error "Offset #{i} must be #{expected_offset} " +
402
+ "but is #{offset}."
403
+ return false
404
+ end
405
+ end
406
+
407
+ last_offset = offset
408
+ end
409
+
410
+ true
411
+ end
412
+
413
+ def check_child_nodes(stack)
414
+ if @children.uniq.size != @children.size
415
+ error "Node #{@_id} has multiple identical children"
416
+ return false
417
+ end
418
+
419
+ @children.each_with_index do |child, i|
420
+ unless child.is_a?(BigArrayNode)
421
+ error "Child #{@_id} is of class #{child.class} " +
422
+ "instead of BigArrayNode"
423
+ return false
424
+ end
425
+
426
+ unless child.parent.is_a?(BigArrayNode)
427
+ error "Parent reference of child #{i} is of class " +
428
+ "#{child.class} instead of BigArrayNode"
429
+ return false
430
+ end
431
+
432
+ if child.parent != self
433
+ error "Child node #{child._id} has wrong parent " +
434
+ "#{child.parent._id}. It should be #{@_id}."
435
+ return false
436
+ end
437
+
438
+ if child == self
439
+ error "Child #{i} point to self"
440
+ return false
441
+ end
442
+
443
+ if stack.include?(child)
444
+ error "Child #{i} points to ancester node"
445
+ return false
446
+ end
447
+
448
+ unless child.parent == self
449
+ error "Child #{i} does not have parent pointing " +
450
+ "to this node"
451
+ return false
452
+ end
453
+ end
454
+
455
+ true
456
+ end
457
+
458
+ # @return [String] Human reable form of the sub-tree.
459
+ def to_s
460
+ str = ''
461
+
462
+ traverse do |node, position, stack|
463
+ if position == 0
464
+ begin
465
+ str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
466
+ "#{node.tree_branch_mark}-" +
467
+ "#{node.size == 0 ? '--' : 'v-'}#{node.tree_summary}\n"
468
+ rescue => e
469
+ str += "@@@@@@@@@@: #{e.message}\n"
470
+ end
471
+ else
472
+ begin
473
+ if node.is_leaf?
474
+ if node.values[position - 1]
475
+ str += "#{node.tree_prefix} " +
476
+ "#{position == node.size ? '-' : '|'} " +
477
+ "[ #{node.value_index(position - 1)}: " +
478
+ "#{node.values[position - 1]} ]\n"
479
+ end
480
+ end
481
+ rescue => e
482
+ str += "@@@@@@@@@@: #{e.message}\n"
483
+ end
484
+ end
485
+ end
486
+
487
+ str
488
+ end
489
+
490
+ # Split the current node into two nodes. The upper half of the elements
491
+ # will be moved into a newly created node. This node will retain the lower
492
+ # half.
493
+ # @return [BigArrayNode] common parent of the two nodes
494
+ def split_node
495
+ unless @parent
496
+ # The node is the root node. We need to create a parent node first.
497
+ self.parent = @store.new(BigArrayNode, @tree, false)
498
+ @parent.offsets[0] = 0
499
+ @parent.children[0] = myself
500
+ @tree.root = @parent
501
+ end
502
+
503
+ # Create the new sibling that will take the 2nd half of the
504
+ # node content.
505
+ sibling = @store.new(BigArrayNode, @tree, is_leaf?, @parent, myself,
506
+ @next_sibling)
507
+ # Determine the index of the middle element that gets moved to the
508
+ # parent. The node size must be an uneven number.
509
+ mid = size / 2
510
+ if is_leaf?
511
+ # Before:
512
+ # +--------------------------+
513
+ # | 0 4 7 |
514
+ # | | |
515
+ # v v v
516
+ # +---------++-------++----------+
517
+ # | A B C D || E F G || H I J K |
518
+ #
519
+ # After:
520
+ # +--------------------------+
521
+ # | 0 2 4 7 |
522
+ # | | | |
523
+ # v v v v
524
+ # +-----++----++-------++----------+
525
+ # | A B || C D || E F G || H I J K |
526
+ #
527
+ #
528
+ # Insert the middle element key into the parent node
529
+ @parent.insert_child_after_peer(mid, sibling, self)
530
+ # Copy the values from the mid element onwards into the new
531
+ # sibling node.
532
+ sibling.values += @values[mid..-1]
533
+ # Delete the copied offsets and values from this node.
534
+ @values.slice!(mid..-1)
535
+ else
536
+ # Before:
537
+ # +--------------+
538
+ # | 0 11 |
539
+ # | |
540
+ # v v
541
+ # +----------++-------+
542
+ # | 0 4 7 10 || 0 2 5 |
543
+ # | | | | | | |
544
+ # v v v v v v v
545
+ #
546
+ # After:
547
+ # +------------------+
548
+ # | 0 7 11 |
549
+ # | | |
550
+ # v v v
551
+ # +-----++-----++-------+
552
+ # | 0 4 0 3 || 0 2 5 |
553
+ # | | | | | | |
554
+ # v v v v v v v
555
+ #
556
+ # Insert the new sibling into the parent node.
557
+ offset_delta = @offsets[mid]
558
+ @parent.insert_child_after_peer(offset_delta, sibling, self)
559
+ # Copy the offsets from after the mid value onwards to the new sibling
560
+ # node. We substract the offset delta from each of them.
561
+ sibling.offsets += @offsets[mid..-1].map{ |v| v - offset_delta }
562
+ # Delete the copied offsets from this node.
563
+ @offsets.slice!(mid..-1)
564
+ # Same copy for the children.
565
+ sibling.children += @children[mid..-1]
566
+ # Reparent the children to the new sibling parent.
567
+ sibling.children.each { |c| c.parent = sibling }
568
+ # And delete the copied children references.
569
+ @children.slice!(mid..-1)
570
+ end
571
+
572
+ @parent
573
+ end
574
+
575
+ def insert_child_after_peer(offset, node, peer = nil)
576
+ peer_index = @children.find_index(peer)
577
+ cidx = peer_index ? peer_index + 1 : 0
578
+ @offsets.insert(cidx, @offsets[peer_index] + offset)
579
+ @children.insert(cidx, node)
580
+ end
581
+
582
+ def consolidate_child_nodes(child)
583
+ unless (child_index = @children.index(child))
584
+ error "Cannot find child to consolidate"
585
+ end
586
+
587
+ if child_index == 0
588
+ # Consolidate with successor if it exists.
589
+ return unless (succ = @children[child_index + 1])
590
+
591
+ if child.size + succ.size <= @tree.node_size
592
+ # merge child with successor
593
+ merge_child_with_next(child_index)
594
+ else
595
+ move_first_element_of_successor_to_child(child_index)
596
+ end
597
+ else
598
+ # consolidate with predecessor
599
+ pred = @children[child_index - 1]
600
+
601
+ if pred.size + child.size <= @tree.node_size
602
+ # merge child with predecessor
603
+ merge_child_with_next(child_index - 1)
604
+ else
605
+ move_last_element_of_predecessor_to_child(child_index)
606
+ end
607
+ end
608
+ end
609
+
610
+ # @param index [offset] offset to search the child index for
611
+ # @return [Integer] Index of the matching offset or the insert position.
612
+ def search_child_index(offset)
613
+ # Handle special case for empty offsets list.
614
+ return 0 if @offsets.empty? || offset <= @offsets.first
615
+
616
+ (@offsets.bsearch_index { |o| o >= offset } || @offsets.length) - 1
617
+ end
618
+
619
+ # @return The index of the current node in the children list of the parent
620
+ # node. If the node is the root node, nil is returned.
621
+ def index_in_parent_node
622
+ return nil unless @parent
623
+
624
+ @parent.children.find_index(self)
625
+ end
626
+
627
+ def first_index
628
+ # TODO: This is a very expensive method. Find a way to make this way
629
+ # faster.
630
+ node = parent
631
+ child = myself
632
+ while node
633
+ if (index = node.children.index(child)) && index > 0
634
+ return node.offsets[index - 1]
635
+ end
636
+ child = node
637
+ node = node.parent
638
+ end
639
+
640
+ 0
641
+ end
642
+
643
+ # Compute the array index of the value with the given index in the current
644
+ # node.
645
+ # @param idx [Integer] Index of the value in the current node
646
+ # @return [Integer] Array index of the value
647
+ def value_index(idx)
648
+ node = self
649
+ while node.parent
650
+ idx += node.parent.offsets[node.index_in_parent_node]
651
+ node = node.parent
652
+ end
653
+
654
+ idx
655
+ end
656
+
657
+ # This method takes care of adjusting the offsets in tree in case elements
658
+ # were inserted or removed. All nodes that hold children after the
659
+ # insert/remove operation needs to be adjusted. Since child nodes get their
660
+ # offsets via their parents, only the parent node and the direct ancestor
661
+ # followers need to be adjusted.
662
+ # @param after_child [BigArrayNode] specifies the modified leaf node
663
+ # @param delta [Integer] specifies how many elements were inserted or
664
+ # removed.
665
+ def adjust_offsets(after_child, delta)
666
+ node = self
667
+
668
+ while node
669
+ adjust = false
670
+ 0.upto(node.children.size - 1) do |i|
671
+ # Iterate over the children until we have found the after_child
672
+ # node. Then turn on adjustment mode. The offsets of the following
673
+ # entries will be adjusted by delta.
674
+ if adjust
675
+ node.offsets[i] += delta
676
+ elsif node.children[i] == after_child
677
+ adjust = true
678
+ end
679
+ end
680
+
681
+ unless adjust
682
+ node.fatal "Could not find child #{after_child._id}"
683
+ end
684
+
685
+ after_child = node
686
+ node = node.parent
687
+ end
688
+ end
689
+
690
+ # This is a generic tree iterator. It yields before it descends into the
691
+ # child node and after (which is identical to before the next child
692
+ # descend). It yields the node, the position and the stack of parent
693
+ # nodes.
694
+ # @yield [node, position, stack]
695
+ def traverse
696
+ # We use a non-recursive implementation to traverse the tree. This stack
697
+ # keeps track of all the known still to be checked nodes.
698
+ stack = [ [ self, 0 ] ]
699
+
700
+ while !stack.empty?
701
+ node, position = stack.pop
702
+
703
+ # Call the payload method. The position marks where we are in the node
704
+ # with respect to the traversal. 0 means we've just entered the node
705
+ # for the first time and are about to descent to the first child.
706
+ # Position 1 is after the 1st child has been processed and before the
707
+ # 2nd child is being processed. If we have N children, the last
708
+ # position is N after we have processed the last child and are about
709
+ # to return to the parent node.
710
+ yield(node, position, stack)
711
+
712
+ if position <= node.size
713
+ # Push the next position for this node onto the stack.
714
+ stack.push([ node, position + 1 ])
715
+
716
+ if !node.is_leaf? && node.children[position]
717
+ # If we have a child node for this position, push the linked node
718
+ # and the starting position onto the stack.
719
+ stack.push([ node.children[position], 0 ])
720
+ end
721
+ end
722
+ end
723
+ end
724
+
725
+ # Gather some statistics about the node and all sub nodes.
726
+ # @param stats [Stats] Data structure that stores the gathered data
727
+ def statistics(stats)
728
+ traverse do |node, position, stack|
729
+ if position == 0
730
+ if node.is_leaf?
731
+ stats.leaf_nodes += 1
732
+ depth = stack.size + 1
733
+ if stats.min_depth.nil? || stats.min_depth < depth
734
+ stats.min_depth = depth
735
+ end
736
+ if stats.max_depth.nil? || stats.max_depth > depth
737
+ stats.max_depth = depth
738
+ end
739
+ else
740
+ stats.branch_nodes += 1
741
+ end
742
+ end
743
+ end
744
+ end
745
+
746
+ # Return the decoration that marks the tree structure of this node for the
747
+ # inspection method.
748
+ def tree_prefix
749
+ node = self
750
+ str = ''
751
+
752
+ while node
753
+ is_last_child = false
754
+ if node.parent
755
+ is_last_child = node.parent.children.last == node
756
+ else
757
+ # Don't add lines for the top-level.
758
+ break
759
+ end
760
+
761
+ str = (is_last_child ? ' ' : ' |') + str
762
+ node = node.parent
763
+ end
764
+
765
+ str
766
+ end
767
+
768
+ # Branch node decoration for the inspection method.
769
+ def tree_branch_mark
770
+ return '' unless @parent
771
+ '-'
772
+ end
773
+
774
+ # Text for the node line for the inspection method.
775
+ def tree_summary
776
+ s = " @#{@_id}"
777
+ if @parent
778
+ begin
779
+ s += " +#{@parent.offsets[index_in_parent_node]} ^#{@parent._id}"
780
+ rescue
781
+ s += ' ^@'
782
+ end
783
+ end
784
+ if @prev_sibling
785
+ begin
786
+ s += " <#{@prev_sibling._id}"
787
+ rescue
788
+ s += ' <@'
789
+ end
790
+ end
791
+ if @next_sibling
792
+ begin
793
+ s += " >#{@next_sibling._id}"
794
+ rescue
795
+ s += ' >@'
796
+ end
797
+ end
798
+
799
+ s
800
+ end
801
+
802
+ # Print and log an error message for the node.
803
+ def error(msg)
804
+ msg = "Error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
805
+ $stderr.puts msg
806
+ PEROBS.log.error msg
807
+ end
808
+
809
+ # Print and log an error message for the node.
810
+ def fatal(msg)
811
+ msg = "Fatal error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
812
+ $stderr.puts msg
813
+ PEROBS.log.fatal msg
814
+ end
815
+
816
+ private
817
+
818
+ def min_size
819
+ @tree.node_size / 2
820
+ end
821
+
822
+ # Move first element of successor to end of child node
823
+ # @param child_index [Integer] index of the child
824
+ def move_first_element_of_successor_to_child(child_index)
825
+ child = @children[child_index]
826
+ succ = @children[child_index + 1]
827
+
828
+ if child.is_leaf?
829
+ # Adjust offset for the successor node
830
+ @offsets[child_index + 1] += 1
831
+ # Move the value
832
+ child.values << succ.values.shift
833
+ else
834
+ # Before:
835
+ #
836
+ # Root Node +--------------------------------+
837
+ # Offsets | 0 7 |
838
+ # Children | |
839
+ # child v succ v
840
+ # Level 1 +---------------++-------------------------------------+
841
+ # Offsets | 0 4 || 0 4 6 9 |
842
+ # Children | | | | | |
843
+ # v v v v v v
844
+ # Leaves +---------++-------++----------++-------++----------++-------+
845
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
846
+ #
847
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
848
+ #
849
+ # After:
850
+ #
851
+ # Root Node +--------------------------------+
852
+ # Offsets | 0 11 |
853
+ # Children | |
854
+ # child v succ v
855
+ # Level 1 +--------------------------++--------------------------+
856
+ # Offsets | 0 4 7 || 0 2 5 |
857
+ # Children | | | | | |
858
+ # v v v v v v
859
+ # Leaves +---------++-------++----------++-------++----------++-------+
860
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
861
+ #
862
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
863
+ #
864
+ # Adjust the offsets of the successor. The 2nd original offset
865
+ # determines the delta for the parent node.
866
+ succ.offsets.shift
867
+ delta = succ.offsets.first
868
+ succ.offsets.map! { |o| o -= delta }
869
+ # The additional child offset can be taken from the parent node
870
+ # reference.
871
+ child.offsets << @offsets[child_index + 1]
872
+ # The parent node offset of the successor needs to be corrected by the
873
+ # delta value.
874
+ @offsets[child_index + 1] += delta
875
+ # Move the child reference
876
+ child.children << succ.children.shift
877
+ child.children.last.parent = child
878
+ end
879
+ end
880
+
881
+ # Move last element of predecessor node to child
882
+ # @param child_index [Integer] index of the child
883
+ def move_last_element_of_predecessor_to_child(child_index)
884
+ pred = @children[child_index - 1]
885
+ child = @children[child_index]
886
+
887
+ if child.is_leaf?
888
+ # Adjust offset for the predecessor node
889
+ @offsets[child_index] -= 1
890
+ # Move the value
891
+ child.values.unshift(pred.values.pop)
892
+ else
893
+ # Before:
894
+ #
895
+ # Root Node +--------------------------------+
896
+ # Offsets | 0 13 |
897
+ # Children | |
898
+ # pred v child v
899
+ # Level 1 +---------------------------------++-------------------+
900
+ # Offsets | 0 4 7 11 || 0 3 |
901
+ # Children | | | | | |
902
+ # v v v v v v
903
+ # Leaves +---------++-------++----------++-------++----------++-------+
904
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
905
+ #
906
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
907
+ #
908
+ # After:
909
+ #
910
+ # Root Node +--------------------------------+
911
+ # Offsets | 0 11 |
912
+ # Children | |
913
+ # prepd v child v
914
+ # Level 1 +--------------------------++--------------------------+
915
+ # Offsets | 0 4 7 || 0 2 5 |
916
+ # Children | | | | | |
917
+ # v v v v v v
918
+ # Leaves +---------++-------++----------++-------++----------++-------+
919
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
920
+ #
921
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
922
+ #
923
+ # Remove the last predecessor offset and update the child offset with
924
+ # it
925
+ delta = @offsets[child_index] - pred.offsets.last
926
+ @offsets[child_index] = pred.offsets.pop
927
+ # Adjust all the offsets of the child
928
+ child.offsets.map! { |o| o += delta }
929
+ # And prepend the 0 offset
930
+ child.offsets.unshift(0)
931
+ # Move the child reference
932
+ child.children.unshift(pred.children.pop)
933
+ child.children.first.parent = child
934
+ end
935
+ end
936
+
937
+ def merge_child_with_next(child_index)
938
+ c1 = @children[child_index]
939
+ c2 = @children[child_index + 1]
940
+
941
+ if c1.is_leaf?
942
+ # Update the sibling links
943
+ c1.next_sibling = c2.next_sibling
944
+ c1.next_sibling.prev_sibling = c1 if c1.next_sibling
945
+
946
+ c1.values += c2.values
947
+ # Adjust the last_leaf reference in the @tree if c1 is now the last
948
+ # sibling.
949
+ @tree.last_leaf = c1 unless c1.next_sibling
950
+ else
951
+ # Before:
952
+ #
953
+ # Root Node +---------------------+
954
+ # Offsets | 0 11 |
955
+ # Children | |
956
+ # c1 v c2 v
957
+ # Level 1 +--------------------------++-----+
958
+ # Offsets | 0 4 7 || 0 |
959
+ # Children | | | |
960
+ # v v v v
961
+ # Leaves +---------++-------++----------++-------+
962
+ # Values | A B C D || E F G || H I J K || L M |
963
+ #
964
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
965
+ #
966
+ # After:
967
+ #
968
+ # Root Node +---+
969
+ # Offsets | 0 |
970
+ # Children |
971
+ # c1 v
972
+ # Level 1 +---------------------------------+
973
+ # Offsets | 0 4 7 11 |
974
+ # Children | | | |
975
+ # v v v v
976
+ # Leaves +---------++-------++----------++-------+
977
+ # Values | A B C D || E F G || H I J K || L M |
978
+ #
979
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
980
+ delta = @offsets[child_index + 1] - @offsets[child_index]
981
+ c1.offsets += c2.offsets.map { |o| o += delta }
982
+ c2.children.each { |c| c.parent = c1 }
983
+ c1.children += c2.children
984
+ end
985
+
986
+ # Remove the child successor from the node.
987
+ @offsets.delete_at(child_index + 1)
988
+ @children.delete_at(child_index + 1)
989
+
990
+ if @parent && size < min_size
991
+ @parent.consolidate_child_nodes(self)
992
+ end
993
+ end
994
+
995
+ end
996
+
997
+ end
998
+