perobs 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,267 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArray.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/BigArrayNode'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArray class implements an Array that stores the data in segments. It
35
+ # only loads the currently needed parts of the Array into memory. To provide
36
+ # an efficient access to the data by index a B+Tree like data structure is
37
+ # used. Each segment is stored in a leaf node of the B+Tree.
38
+ class BigArray < PEROBS::Object
39
+
40
+ class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
41
+ :max_depth)
42
+ end
43
+
44
+ attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
45
+
46
+ # Internal constructor. Use Store.new() instead.
47
+ # @param p [Handle]
48
+ # @param node_size [Integer] The size of the tree nodes. This determines
49
+ # how many entries must be read/written for each operation. The
50
+ # default of 150 was emperically found to be a performance sweet
51
+ # spot. Smaller values will improve write operations. Larger
52
+ # values will improve read operations. 20 - 500 is a reasonable
53
+ # range to try.
54
+ def initialize(p, node_size = 150)
55
+ super(p)
56
+ unless node_size > 3
57
+ PEROBS.log.fatal "Node size (#{node_size}) must be larger than 3"
58
+ end
59
+ unless node_size % 2 == 0
60
+ PEROBS.log.fatal "Node size (#{node_size}) must be an even number"
61
+ end
62
+
63
+ self.node_size = node_size
64
+ clear
65
+ end
66
+
67
+ # Remove all entries from the BigArray.
68
+ def clear
69
+ self.root = self.first_leaf = self.last_leaf =
70
+ @store.new(BigArrayNode, myself, true)
71
+ self.entry_counter = 0
72
+ end
73
+
74
+ # Store the value at the given index. If the index already exists the old
75
+ # value will be overwritten.
76
+ # @param index [Integer] Position in the array
77
+ # @param value [Integer] value
78
+ def []=(index, value)
79
+ index = validate_index_range(index)
80
+
81
+ @store.transaction do
82
+ if index < @entry_counter
83
+ # Overwrite of an existing element
84
+ @root.set(index, value)
85
+ elsif index == @entry_counter
86
+ # Append right at the end
87
+ @root.insert(index, value)
88
+ self.entry_counter += 1
89
+ else
90
+ # Append with nil padding
91
+ @entry_counter.upto(index - 1) do |i|
92
+ @root.insert(i, nil)
93
+ end
94
+ @root.insert(index, value)
95
+ self.entry_counter = index + 1
96
+ end
97
+ end
98
+ end
99
+
100
+ def <<(value)
101
+ self[@entry_counter] = value
102
+ end
103
+
104
+ # Insert the value at the given index. If the index already exists the old
105
+ # value will be overwritten.
106
+ # @param index [Integer] Position in the array
107
+ # @param value [Integer] value
108
+ def insert(index, value)
109
+ index = validate_index_range(index)
110
+
111
+ if index < @entry_counter
112
+ # Insert in between existing elements
113
+ @store.transaction do
114
+ @root.insert(index, value)
115
+ self.entry_counter += 1
116
+ end
117
+ else
118
+ self[index] = value
119
+ end
120
+ end
121
+
122
+ # Return the value stored at the given index.
123
+ # @param index [Integer] Position in the array
124
+ # @return [Integer or nil] found value or nil
125
+ def [](index)
126
+ index = validate_index_range(index)
127
+
128
+ return nil if index >= @entry_counter
129
+
130
+ @root.get(index)
131
+ end
132
+
133
+ # Check if there is an entry for the given key.
134
+ # @param key [Integer] Unique key
135
+ # @return [Boolean] True if key is present, false otherwise.
136
+ def has_key?(key)
137
+ @root.has_key?(key)
138
+ end
139
+
140
+ # Delete the element at the specified index, returning that element, or
141
+ # nil if the index is out of range.
142
+ # @param index [Integer] Index in the BigArray
143
+ # @return [Object] found value or nil
144
+ def delete_at(index)
145
+ if index < 0
146
+ index = @entry_counter + index
147
+ end
148
+
149
+ return nil if index < 0 || index >= @entry_counter
150
+
151
+ deleted_value = nil
152
+ @store.transaction do
153
+ deleted_value = @root.delete_at(index)
154
+ self.entry_counter -= 1
155
+
156
+ # Eliminate single entry nodes at the top.
157
+ while !@root.is_leaf? && @root.size == 1
158
+ @root = @root.children.first
159
+ @root.parent = nil
160
+ end
161
+ end
162
+
163
+ deleted_value
164
+ end
165
+
166
+ # Delete all entries for which the passed block yields true. The
167
+ # implementation is optimized for large bulk deletes. It rebuilds a new
168
+ # BTree for the elements to keep. If only few elements are deleted the
169
+ # overhead of rebuilding the BTree is rather high.
170
+ # @yield [key, value]
171
+ def delete_if
172
+ old_root = @root
173
+ clear
174
+ old_root.each do |k, v|
175
+ if !yield(k, v)
176
+ insert(k, v)
177
+ end
178
+ end
179
+ end
180
+
181
+ # @return [Integer] The number of entries stored in the tree.
182
+ def length
183
+ @entry_counter
184
+ end
185
+
186
+ alias size length
187
+
188
+ # Return true if the BigArray has no stored entries.
189
+ def empty?
190
+ @entry_counter == 0
191
+ end
192
+
193
+ # Iterate over all entries in the tree. Entries are always sorted by the
194
+ # key.
195
+ # @yield [key, value]
196
+ def each(&block)
197
+ node = @first_leaf
198
+ while node
199
+ break unless node.each(&block)
200
+ node = node.next_sibling
201
+ end
202
+ end
203
+
204
+ # Iterate over all entries in the tree in reverse order. Entries are
205
+ # always sorted by the key.
206
+ # @yield [key, value]
207
+ def reverse_each(&block)
208
+ node = @last_leaf
209
+ while node
210
+ break unless node.reverse_each(&block)
211
+ node = node.prev_sibling
212
+ end
213
+ end
214
+
215
+ # Convert the BigArray into a Ruby Array. This is primarily intended for
216
+ # debugging as real-world BigArray objects are likely too big to fit into
217
+ # memory.
218
+ def to_a
219
+ ary = []
220
+ node = @first_leaf
221
+ while node do
222
+ ary += node.values
223
+ node = node.next_sibling
224
+ end
225
+
226
+ ary
227
+ end
228
+
229
+ # @return [String] Human reable form of the tree. This is only intended
230
+ # for debugging and should only be used with small BigArray objects.
231
+ def to_s
232
+ @root.to_s
233
+ end
234
+
235
+ # Check if the tree file contains any errors.
236
+ # @return [Boolean] true if no erros were found, false otherwise
237
+ def check(&block)
238
+ @root.check(&block)
239
+ end
240
+
241
+ # Gather some statistics regarding the tree structure.
242
+ # @return [Stats] Structs with gathered data
243
+ def statistics
244
+ stats = Stats.new(0, 0, nil, nil)
245
+ @root.statistics(stats)
246
+ stats
247
+ end
248
+
249
+ private
250
+
251
+ def validate_index_range(index)
252
+ if index < 0
253
+ if -index > @entry_counter
254
+ raise IndexError, "index #{index} too small for array; " +
255
+ "minimum #{-@entry_counter}"
256
+ end
257
+
258
+ index = @entry_counter + index
259
+ end
260
+
261
+ index
262
+ end
263
+
264
+ end
265
+
266
+ end
267
+
@@ -0,0 +1,998 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArrayNode.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/Array'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArrayNode class provides the BTree nodes for the BigArray objects.
35
+ # A node can either be a branch node or a leaf node. Branch nodes don't
36
+ # store values, only offsets and references to child nodes. Leaf nodes don't
37
+ # have child nodes but store the actual values. The leaf nodes always
38
+ # contain at least node_size / 2 number of consecutive values. The index of
39
+ # the first value in the BigArray is the sum of the offsets stored in the
40
+ # parent nodes. Branch nodes store the offsets and the corresponding
41
+ # child node references. The first offset is always 0. Consecutive offsets
42
+ # are set to the previous offset plus the total number of values stored in
43
+ # the previous child node. The leaf nodes don't contain wholes. A
44
+ # concatenation of all leaf node values represents the stored Array.
45
+ #
46
+ # Root Node +--------------------------------+
47
+ # Offsets | 0 11 |
48
+ # Children | |
49
+ # v v
50
+ # Level 1 +--------------------------++--------------------------+
51
+ # Offsets | 0 4 7 || 0 2 5 |
52
+ # Children | | | | | |
53
+ # v v v v v v
54
+ # Leaves +---------++-------++----------++-------++----------++-------+
55
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
56
+ #
57
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
58
+ #
59
+ class BigArrayNode < PEROBS::Object
60
+
61
+ attr_persist :tree, :parent, :offsets, :values, :children,
62
+ :prev_sibling, :next_sibling
63
+
64
+ # Internal constructor. Use Store.new(BigArrayNode, ...) instead.
65
+ # @param p [Handle]
66
+ # @param tree [BigArray] The tree this node should belong to
67
+ # @param is_leaf [Boolean] True if a leaf node should be created, false
68
+ # for a branch node.
69
+ # @param parent [BigArrayNode] Parent node
70
+ # @param prev_sibling [BigArrayNode] Previous sibling
71
+ # @param next_sibling [BigArrayNode] Next sibling
72
+ def initialize(p, tree, is_leaf, parent = nil,
73
+ prev_sibling = nil, next_sibling = nil)
74
+ super(p)
75
+ self.tree = tree
76
+ self.parent = parent
77
+
78
+ if is_leaf
79
+ # Create a new leaf node. It stores values and has no children.
80
+ self.values = @store.new(PEROBS::Array)
81
+ self.children = self.offsets = nil
82
+
83
+ # Link the neighboring siblings to the newly inserted node. If the
84
+ # node has no sibling on a side we also must register it as first or
85
+ # last leaf with the BigArray object.
86
+ if (self.prev_sibling = prev_sibling)
87
+ @prev_sibling.next_sibling = myself
88
+ else
89
+ @tree.first_leaf = myself
90
+ end
91
+ if (self.next_sibling = next_sibling)
92
+ @next_sibling.prev_sibling = myself
93
+ else
94
+ @tree.last_leaf = myself
95
+ end
96
+ else
97
+ # Create a new branch node. It stores keys and child node references
98
+ # but no values.
99
+ self.offsets = @store.new(PEROBS::Array)
100
+ self.children = @store.new(PEROBS::Array)
101
+ self.values = nil
102
+ # Branch nodes don't need sibling links.
103
+ self.prev_sibling = self.next_sibling = nil
104
+ end
105
+ end
106
+
107
+ # @return [Boolean] True if this is a leaf node, false otherwise.
108
+ def is_leaf?
109
+ @children.nil?
110
+ end
111
+
112
+ def size
113
+ is_leaf? ? @values.size : @children.size
114
+ end
115
+
116
+ # @return [Integer] the number of values stored in this node.
117
+ def values_count
118
+ count = 0
119
+ node = self
120
+ while node
121
+ if node.is_leaf?
122
+ return count + node.values.size
123
+ else
124
+ count += node.offsets.last
125
+ node = node.children.last
126
+ end
127
+ end
128
+ end
129
+
130
+
131
+ # Set the given value at the given index.
132
+ # @param index [Integer] Position to insert at
133
+ # @param value [Integer] value to insert
134
+ def set(index, value)
135
+ node = self
136
+
137
+ # Traverse the tree to find the right node to add or replace the value.
138
+ while node do
139
+ # Once we have reached a leaf node we can insert or replace the value.
140
+ if node.is_leaf?
141
+ if index >= node.values.size
142
+ node.fatal "Set index (#{index}) larger than values array " +
143
+ "(#{node.values.size})."
144
+ end
145
+ node.values[index] = value
146
+ return
147
+ else
148
+ # Descend into the right child node to add the value to.
149
+ cidx = node.search_child_index(index)
150
+ index -= node.offsets[cidx]
151
+ node = node.children[cidx]
152
+ end
153
+ end
154
+
155
+ node.fatal "Could not find proper node to set the value while " +
156
+ "looking for index #{index}"
157
+ end
158
+
159
+ # Insert the given value at the given index. All following values will be
160
+ # pushed to a higher index.
161
+ # @param index [Integer] Position to insert at
162
+ # @param value [Integer] value to insert
163
+ def insert(index, value)
164
+ node = self
165
+ cidx = nil
166
+
167
+ # Traverse the tree to find the right node to add or replace the value.
168
+ while node do
169
+ # All nodes that we find on the way that are full will be split into
170
+ # two half-full nodes.
171
+ if node.size >= @tree.node_size
172
+ # Re-add the index from the last parent node since we will descent
173
+ # into one of the split nodes.
174
+ index += node.parent.offsets[cidx] if node.parent
175
+ node = node.split_node
176
+ end
177
+
178
+ # Once we have reached a leaf node we can insert or replace the value.
179
+ if node.is_leaf?
180
+ node.values.insert(index, value)
181
+ node.parent.adjust_offsets(node, 1) if node.parent
182
+ return
183
+ else
184
+ # Descend into the right child node to add the value to.
185
+ cidx = node.search_child_index(index)
186
+ if (index -= node.offsets[cidx]) < 0
187
+ node.fatal "Index (#{index}) became negative"
188
+ end
189
+ node = node.children[cidx]
190
+ end
191
+ end
192
+
193
+ node.fatal "Could not find proper node to insert the value while " +
194
+ "looking for index #{index}"
195
+ end
196
+
197
+ # Return the value that matches the given key or return nil if they key is
198
+ # unknown.
199
+ # @param index [Integer] Position to insert at
200
+ # @return [Integer or nil] value that matches the key
201
+ def get(index)
202
+ node = self
203
+
204
+ # Traverse the tree to find the right node to add or replace the value.
205
+ while node do
206
+ # Once we have reached a leaf node we can insert or replace the value.
207
+ if node.is_leaf?
208
+ return node.values[index]
209
+ else
210
+ # Descend into the right child node to add the value to.
211
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
212
+ node.offsets.length) - 1
213
+ if (index -= node.offsets[cidx]) < 0
214
+ node.fatal "Index (#{index}) became negative"
215
+ end
216
+ node = node.children[cidx]
217
+ end
218
+ end
219
+
220
+ PEROBS.log.fatal "Could not find proper node to get from while " +
221
+ "looking for index #{index}"
222
+ end
223
+
224
+ # Delete the element at the specified index, returning that element, or
225
+ # nil if the index is out of range.
226
+ # @param index [Integer] Index in the BigArray
227
+ # @return [Object] found value or nil
228
+ def delete_at(index)
229
+ node = self
230
+ deleted_value = nil
231
+
232
+ while node do
233
+ if node.is_leaf?
234
+ deleted_value = node.values.delete_at(index)
235
+ if node.parent
236
+ node.parent.adjust_offsets(node, -1)
237
+ if node.size < min_size
238
+ node.parent.consolidate_child_nodes(node)
239
+ end
240
+ end
241
+
242
+ return deleted_value
243
+ else
244
+ # Descend into the right child node to add the value to.
245
+ cidx = (node.offsets.bsearch_index { |o| o > index } ||
246
+ node.offsets.length) - 1
247
+ if (index -= node.offsets[cidx]) < 0
248
+ node.fatal "Index (#{index}) became negative"
249
+ end
250
+ node = node.children[cidx]
251
+ end
252
+ end
253
+
254
+ PEROBS.log.fatal "Could not find proper node to delete from while " +
255
+ "looking for index #{index}"
256
+ end
257
+
258
+ # Iterate over all the values of the node.
259
+ # @yield [value]
260
+ def each
261
+ return nil unless is_leaf?
262
+
263
+ @values.each do |v|
264
+ yield(v)
265
+ end
266
+ end
267
+
268
+ # Iterate over all the values of the node in reverse order.
269
+ # @yield [value]
270
+ def reverse_each
271
+ return nil unless is_leaf?
272
+
273
+ @values.reverse_each do |v|
274
+ yield(v)
275
+ end
276
+ end
277
+
278
+ # Check consistency of the node and all subsequent nodes. In case an error
279
+ # is found, a message is logged and false is returned.
280
+ # @yield [key, value]
281
+ # @return [Boolean] true if tree has no errors
282
+ def check
283
+ branch_depth = nil
284
+
285
+ traverse do |node, position, stack|
286
+ if position == 0
287
+ # Nodes should have between min_size() and
288
+ # @tree.node_size children or values. Only the root node may have
289
+ # less.
290
+ if node.size > @tree.node_size
291
+ node.error "BigArray node #{node._id} is too large. It has " +
292
+ "#{node.size} nodes instead of max. #{@tree.node_size}."
293
+ return false
294
+ end
295
+ if node.parent && node.size < min_size
296
+ node.error "BigArray node #{node._id} is too small"
297
+ return false
298
+ end
299
+
300
+ if node.is_leaf?
301
+ # All leaf nodes must have same distance from root node.
302
+ if branch_depth
303
+ unless branch_depth == stack.size
304
+ node.error "All leaf nodes must have same distance from root"
305
+ return false
306
+ end
307
+ else
308
+ branch_depth = stack.size
309
+ end
310
+
311
+ return false unless node.check_leaf_node_links
312
+
313
+ if node.children
314
+ node.error "children must be nil for a leaf node"
315
+ return false
316
+ end
317
+ else
318
+ unless node.children.size == node.offsets.size
319
+ node.error "Offset count (#{node.offsets.size}) must be equal " +
320
+ "to children count (#{node.children.size})"
321
+ return false
322
+ end
323
+
324
+ if node.values
325
+ node.error "values must be nil for a branch node"
326
+ return false
327
+ end
328
+
329
+ unless @prev_sibling.nil? && @next_sibling.nil?
330
+ node.error "prev_sibling and next_sibling must be nil for " +
331
+ "branch nodes"
332
+ end
333
+
334
+ return false unless node.check_offsets
335
+
336
+ return false unless node.check_child_nodes(stack)
337
+ end
338
+ elsif position <= node.size
339
+ # These checks are done after we have completed the respective child
340
+ # node with index 'position - 1'.
341
+ index = position - 1
342
+ if node.is_leaf?
343
+ if block_given?
344
+ # If a block was given, call this block with the key and value.
345
+ return false unless yield(node.first_index + index,
346
+ node.values[index])
347
+ end
348
+ end
349
+ end
350
+ end
351
+
352
+ true
353
+ end
354
+
355
+ def check_leaf_node_links
356
+ if @prev_sibling.nil?
357
+ if @tree.first_leaf != self
358
+ error "Leaf node #{@_id} has no previous sibling " +
359
+ "but is not the first leaf of the tree"
360
+ return false
361
+ end
362
+ elsif @prev_sibling.next_sibling != self
363
+ error "next_sibling of previous sibling does not point to " +
364
+ "this node"
365
+ return false
366
+ end
367
+
368
+ if @next_sibling.nil?
369
+ if @tree.last_leaf != self
370
+ error "Leaf node #{@_id} has no next sibling " +
371
+ "but is not the last leaf of the tree"
372
+ return false
373
+ end
374
+ elsif @next_sibling.prev_sibling != self
375
+ error "previous_sibling of next sibling does not point to " +
376
+ "this node"
377
+ return false
378
+ end
379
+
380
+ true
381
+ end
382
+
383
+ def check_offsets
384
+ return true if @parent.nil? && @offsets.empty?
385
+
386
+ if @offsets[0] != 0
387
+ error "First offset is not 0: #{@offsets.inspect}"
388
+ return false
389
+ end
390
+
391
+ last_offset = nil
392
+ @offsets.each_with_index do |offset, i|
393
+ if i > 0
394
+ if offset < last_offset
395
+ error "Offset are not strictly monotoneously " +
396
+ "increasing: #{@offsets.inspect}"
397
+ return false
398
+ end
399
+ expected_offset = last_offset + @children[i - 1].values_count
400
+ if offset != expected_offset
401
+ error "Offset #{i} must be #{expected_offset} " +
402
+ "but is #{offset}."
403
+ return false
404
+ end
405
+ end
406
+
407
+ last_offset = offset
408
+ end
409
+
410
+ true
411
+ end
412
+
413
+ def check_child_nodes(stack)
414
+ if @children.uniq.size != @children.size
415
+ error "Node #{@_id} has multiple identical children"
416
+ return false
417
+ end
418
+
419
+ @children.each_with_index do |child, i|
420
+ unless child.is_a?(BigArrayNode)
421
+ error "Child #{@_id} is of class #{child.class} " +
422
+ "instead of BigArrayNode"
423
+ return false
424
+ end
425
+
426
+ unless child.parent.is_a?(BigArrayNode)
427
+ error "Parent reference of child #{i} is of class " +
428
+ "#{child.class} instead of BigArrayNode"
429
+ return false
430
+ end
431
+
432
+ if child.parent != self
433
+ error "Child node #{child._id} has wrong parent " +
434
+ "#{child.parent._id}. It should be #{@_id}."
435
+ return false
436
+ end
437
+
438
+ if child == self
439
+ error "Child #{i} point to self"
440
+ return false
441
+ end
442
+
443
+ if stack.include?(child)
444
+ error "Child #{i} points to ancester node"
445
+ return false
446
+ end
447
+
448
+ unless child.parent == self
449
+ error "Child #{i} does not have parent pointing " +
450
+ "to this node"
451
+ return false
452
+ end
453
+ end
454
+
455
+ true
456
+ end
457
+
458
+ # @return [String] Human reable form of the sub-tree.
459
+ def to_s
460
+ str = ''
461
+
462
+ traverse do |node, position, stack|
463
+ if position == 0
464
+ begin
465
+ str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
466
+ "#{node.tree_branch_mark}-" +
467
+ "#{node.size == 0 ? '--' : 'v-'}#{node.tree_summary}\n"
468
+ rescue => e
469
+ str += "@@@@@@@@@@: #{e.message}\n"
470
+ end
471
+ else
472
+ begin
473
+ if node.is_leaf?
474
+ if node.values[position - 1]
475
+ str += "#{node.tree_prefix} " +
476
+ "#{position == node.size ? '-' : '|'} " +
477
+ "[ #{node.value_index(position - 1)}: " +
478
+ "#{node.values[position - 1]} ]\n"
479
+ end
480
+ end
481
+ rescue => e
482
+ str += "@@@@@@@@@@: #{e.message}\n"
483
+ end
484
+ end
485
+ end
486
+
487
+ str
488
+ end
489
+
490
+ # Split the current node into two nodes. The upper half of the elements
491
+ # will be moved into a newly created node. This node will retain the lower
492
+ # half.
493
+ # @return [BigArrayNode] common parent of the two nodes
494
+ def split_node
495
+ unless @parent
496
+ # The node is the root node. We need to create a parent node first.
497
+ self.parent = @store.new(BigArrayNode, @tree, false)
498
+ @parent.offsets[0] = 0
499
+ @parent.children[0] = myself
500
+ @tree.root = @parent
501
+ end
502
+
503
+ # Create the new sibling that will take the 2nd half of the
504
+ # node content.
505
+ sibling = @store.new(BigArrayNode, @tree, is_leaf?, @parent, myself,
506
+ @next_sibling)
507
+ # Determine the index of the middle element that gets moved to the
508
+ # parent. The node size must be an uneven number.
509
+ mid = size / 2
510
+ if is_leaf?
511
+ # Before:
512
+ # +--------------------------+
513
+ # | 0 4 7 |
514
+ # | | |
515
+ # v v v
516
+ # +---------++-------++----------+
517
+ # | A B C D || E F G || H I J K |
518
+ #
519
+ # After:
520
+ # +--------------------------+
521
+ # | 0 2 4 7 |
522
+ # | | | |
523
+ # v v v v
524
+ # +-----++----++-------++----------+
525
+ # | A B || C D || E F G || H I J K |
526
+ #
527
+ #
528
+ # Insert the middle element key into the parent node
529
+ @parent.insert_child_after_peer(mid, sibling, self)
530
+ # Copy the values from the mid element onwards into the new
531
+ # sibling node.
532
+ sibling.values += @values[mid..-1]
533
+ # Delete the copied offsets and values from this node.
534
+ @values.slice!(mid..-1)
535
+ else
536
+ # Before:
537
+ # +--------------+
538
+ # | 0 11 |
539
+ # | |
540
+ # v v
541
+ # +----------++-------+
542
+ # | 0 4 7 10 || 0 2 5 |
543
+ # | | | | | | |
544
+ # v v v v v v v
545
+ #
546
+ # After:
547
+ # +------------------+
548
+ # | 0 7 11 |
549
+ # | | |
550
+ # v v v
551
+ # +-----++-----++-------+
552
+ # | 0 4 0 3 || 0 2 5 |
553
+ # | | | | | | |
554
+ # v v v v v v v
555
+ #
556
+ # Insert the new sibling into the parent node.
557
+ offset_delta = @offsets[mid]
558
+ @parent.insert_child_after_peer(offset_delta, sibling, self)
559
+ # Copy the offsets from after the mid value onwards to the new sibling
560
+ # node. We substract the offset delta from each of them.
561
+ sibling.offsets += @offsets[mid..-1].map{ |v| v - offset_delta }
562
+ # Delete the copied offsets from this node.
563
+ @offsets.slice!(mid..-1)
564
+ # Same copy for the children.
565
+ sibling.children += @children[mid..-1]
566
+ # Reparent the children to the new sibling parent.
567
+ sibling.children.each { |c| c.parent = sibling }
568
+ # And delete the copied children references.
569
+ @children.slice!(mid..-1)
570
+ end
571
+
572
+ @parent
573
+ end
574
+
575
+ def insert_child_after_peer(offset, node, peer = nil)
576
+ peer_index = @children.find_index(peer)
577
+ cidx = peer_index ? peer_index + 1 : 0
578
+ @offsets.insert(cidx, @offsets[peer_index] + offset)
579
+ @children.insert(cidx, node)
580
+ end
581
+
582
+ def consolidate_child_nodes(child)
583
+ unless (child_index = @children.index(child))
584
+ error "Cannot find child to consolidate"
585
+ end
586
+
587
+ if child_index == 0
588
+ # Consolidate with successor if it exists.
589
+ return unless (succ = @children[child_index + 1])
590
+
591
+ if child.size + succ.size <= @tree.node_size
592
+ # merge child with successor
593
+ merge_child_with_next(child_index)
594
+ else
595
+ move_first_element_of_successor_to_child(child_index)
596
+ end
597
+ else
598
+ # consolidate with predecessor
599
+ pred = @children[child_index - 1]
600
+
601
+ if pred.size + child.size <= @tree.node_size
602
+ # merge child with predecessor
603
+ merge_child_with_next(child_index - 1)
604
+ else
605
+ move_last_element_of_predecessor_to_child(child_index)
606
+ end
607
+ end
608
+ end
609
+
610
+ # @param index [offset] offset to search the child index for
611
+ # @return [Integer] Index of the matching offset or the insert position.
612
+ def search_child_index(offset)
613
+ # Handle special case for empty offsets list.
614
+ return 0 if @offsets.empty? || offset <= @offsets.first
615
+
616
+ (@offsets.bsearch_index { |o| o >= offset } || @offsets.length) - 1
617
+ end
618
+
619
+ # @return The index of the current node in the children list of the parent
620
+ # node. If the node is the root node, nil is returned.
621
+ def index_in_parent_node
622
+ return nil unless @parent
623
+
624
+ @parent.children.find_index(self)
625
+ end
626
+
627
+ def first_index
628
+ # TODO: This is a very expensive method. Find a way to make this way
629
+ # faster.
630
+ node = parent
631
+ child = myself
632
+ while node
633
+ if (index = node.children.index(child)) && index > 0
634
+ return node.offsets[index - 1]
635
+ end
636
+ child = node
637
+ node = node.parent
638
+ end
639
+
640
+ 0
641
+ end
642
+
643
+ # Compute the array index of the value with the given index in the current
644
+ # node.
645
+ # @param idx [Integer] Index of the value in the current node
646
+ # @return [Integer] Array index of the value
647
+ def value_index(idx)
648
+ node = self
649
+ while node.parent
650
+ idx += node.parent.offsets[node.index_in_parent_node]
651
+ node = node.parent
652
+ end
653
+
654
+ idx
655
+ end
656
+
657
+ # This method takes care of adjusting the offsets in tree in case elements
658
+ # were inserted or removed. All nodes that hold children after the
659
+ # insert/remove operation needs to be adjusted. Since child nodes get their
660
+ # offsets via their parents, only the parent node and the direct ancestor
661
+ # followers need to be adjusted.
662
+ # @param after_child [BigArrayNode] specifies the modified leaf node
663
+ # @param delta [Integer] specifies how many elements were inserted or
664
+ # removed.
665
+ def adjust_offsets(after_child, delta)
666
+ node = self
667
+
668
+ while node
669
+ adjust = false
670
+ 0.upto(node.children.size - 1) do |i|
671
+ # Iterate over the children until we have found the after_child
672
+ # node. Then turn on adjustment mode. The offsets of the following
673
+ # entries will be adjusted by delta.
674
+ if adjust
675
+ node.offsets[i] += delta
676
+ elsif node.children[i] == after_child
677
+ adjust = true
678
+ end
679
+ end
680
+
681
+ unless adjust
682
+ node.fatal "Could not find child #{after_child._id}"
683
+ end
684
+
685
+ after_child = node
686
+ node = node.parent
687
+ end
688
+ end
689
+
690
+ # This is a generic tree iterator. It yields before it descends into the
691
+ # child node and after (which is identical to before the next child
692
+ # descend). It yields the node, the position and the stack of parent
693
+ # nodes.
694
+ # @yield [node, position, stack]
695
+ def traverse
696
+ # We use a non-recursive implementation to traverse the tree. This stack
697
+ # keeps track of all the known still to be checked nodes.
698
+ stack = [ [ self, 0 ] ]
699
+
700
+ while !stack.empty?
701
+ node, position = stack.pop
702
+
703
+ # Call the payload method. The position marks where we are in the node
704
+ # with respect to the traversal. 0 means we've just entered the node
705
+ # for the first time and are about to descent to the first child.
706
+ # Position 1 is after the 1st child has been processed and before the
707
+ # 2nd child is being processed. If we have N children, the last
708
+ # position is N after we have processed the last child and are about
709
+ # to return to the parent node.
710
+ yield(node, position, stack)
711
+
712
+ if position <= node.size
713
+ # Push the next position for this node onto the stack.
714
+ stack.push([ node, position + 1 ])
715
+
716
+ if !node.is_leaf? && node.children[position]
717
+ # If we have a child node for this position, push the linked node
718
+ # and the starting position onto the stack.
719
+ stack.push([ node.children[position], 0 ])
720
+ end
721
+ end
722
+ end
723
+ end
724
+
725
+ # Gather some statistics about the node and all sub nodes.
726
+ # @param stats [Stats] Data structure that stores the gathered data
727
+ def statistics(stats)
728
+ traverse do |node, position, stack|
729
+ if position == 0
730
+ if node.is_leaf?
731
+ stats.leaf_nodes += 1
732
+ depth = stack.size + 1
733
+ if stats.min_depth.nil? || stats.min_depth < depth
734
+ stats.min_depth = depth
735
+ end
736
+ if stats.max_depth.nil? || stats.max_depth > depth
737
+ stats.max_depth = depth
738
+ end
739
+ else
740
+ stats.branch_nodes += 1
741
+ end
742
+ end
743
+ end
744
+ end
745
+
746
+ # Return the decoration that marks the tree structure of this node for the
747
+ # inspection method.
748
+ def tree_prefix
749
+ node = self
750
+ str = ''
751
+
752
+ while node
753
+ is_last_child = false
754
+ if node.parent
755
+ is_last_child = node.parent.children.last == node
756
+ else
757
+ # Don't add lines for the top-level.
758
+ break
759
+ end
760
+
761
+ str = (is_last_child ? ' ' : ' |') + str
762
+ node = node.parent
763
+ end
764
+
765
+ str
766
+ end
767
+
768
+ # Branch node decoration for the inspection method.
769
+ def tree_branch_mark
770
+ return '' unless @parent
771
+ '-'
772
+ end
773
+
774
+ # Text for the node line for the inspection method.
775
+ def tree_summary
776
+ s = " @#{@_id}"
777
+ if @parent
778
+ begin
779
+ s += " +#{@parent.offsets[index_in_parent_node]} ^#{@parent._id}"
780
+ rescue
781
+ s += ' ^@'
782
+ end
783
+ end
784
+ if @prev_sibling
785
+ begin
786
+ s += " <#{@prev_sibling._id}"
787
+ rescue
788
+ s += ' <@'
789
+ end
790
+ end
791
+ if @next_sibling
792
+ begin
793
+ s += " >#{@next_sibling._id}"
794
+ rescue
795
+ s += ' >@'
796
+ end
797
+ end
798
+
799
+ s
800
+ end
801
+
802
+ # Print and log an error message for the node.
803
+ def error(msg)
804
+ msg = "Error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
805
+ $stderr.puts msg
806
+ PEROBS.log.error msg
807
+ end
808
+
809
+ # Print and log an error message for the node.
810
+ def fatal(msg)
811
+ msg = "Fatal error in BigArray node @#{@_id}: #{msg}\n" + @tree.to_s
812
+ $stderr.puts msg
813
+ PEROBS.log.fatal msg
814
+ end
815
+
816
+ private
817
+
818
+ def min_size
819
+ @tree.node_size / 2
820
+ end
821
+
822
+ # Move first element of successor to end of child node
823
+ # @param child_index [Integer] index of the child
824
+ def move_first_element_of_successor_to_child(child_index)
825
+ child = @children[child_index]
826
+ succ = @children[child_index + 1]
827
+
828
+ if child.is_leaf?
829
+ # Adjust offset for the successor node
830
+ @offsets[child_index + 1] += 1
831
+ # Move the value
832
+ child.values << succ.values.shift
833
+ else
834
+ # Before:
835
+ #
836
+ # Root Node +--------------------------------+
837
+ # Offsets | 0 7 |
838
+ # Children | |
839
+ # child v succ v
840
+ # Level 1 +---------------++-------------------------------------+
841
+ # Offsets | 0 4 || 0 4 6 9 |
842
+ # Children | | | | | |
843
+ # v v v v v v
844
+ # Leaves +---------++-------++----------++-------++----------++-------+
845
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
846
+ #
847
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
848
+ #
849
+ # After:
850
+ #
851
+ # Root Node +--------------------------------+
852
+ # Offsets | 0 11 |
853
+ # Children | |
854
+ # child v succ v
855
+ # Level 1 +--------------------------++--------------------------+
856
+ # Offsets | 0 4 7 || 0 2 5 |
857
+ # Children | | | | | |
858
+ # v v v v v v
859
+ # Leaves +---------++-------++----------++-------++----------++-------+
860
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
861
+ #
862
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
863
+ #
864
+ # Adjust the offsets of the successor. The 2nd original offset
865
+ # determines the delta for the parent node.
866
+ succ.offsets.shift
867
+ delta = succ.offsets.first
868
+ succ.offsets.map! { |o| o -= delta }
869
+ # The additional child offset can be taken from the parent node
870
+ # reference.
871
+ child.offsets << @offsets[child_index + 1]
872
+ # The parent node offset of the successor needs to be corrected by the
873
+ # delta value.
874
+ @offsets[child_index + 1] += delta
875
+ # Move the child reference
876
+ child.children << succ.children.shift
877
+ child.children.last.parent = child
878
+ end
879
+ end
880
+
881
+ # Move last element of predecessor node to child
882
+ # @param child_index [Integer] index of the child
883
+ def move_last_element_of_predecessor_to_child(child_index)
884
+ pred = @children[child_index - 1]
885
+ child = @children[child_index]
886
+
887
+ if child.is_leaf?
888
+ # Adjust offset for the predecessor node
889
+ @offsets[child_index] -= 1
890
+ # Move the value
891
+ child.values.unshift(pred.values.pop)
892
+ else
893
+ # Before:
894
+ #
895
+ # Root Node +--------------------------------+
896
+ # Offsets | 0 13 |
897
+ # Children | |
898
+ # pred v child v
899
+ # Level 1 +---------------------------------++-------------------+
900
+ # Offsets | 0 4 7 11 || 0 3 |
901
+ # Children | | | | | |
902
+ # v v v v v v
903
+ # Leaves +---------++-------++----------++-------++----------++-------+
904
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
905
+ #
906
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
907
+ #
908
+ # After:
909
+ #
910
+ # Root Node +--------------------------------+
911
+ # Offsets | 0 11 |
912
+ # Children | |
913
+ # prepd v child v
914
+ # Level 1 +--------------------------++--------------------------+
915
+ # Offsets | 0 4 7 || 0 2 5 |
916
+ # Children | | | | | |
917
+ # v v v v v v
918
+ # Leaves +---------++-------++----------++-------++----------++-------+
919
+ # Values | A B C D || E F G || H I J K || L M || N O P || Q R |
920
+ #
921
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
922
+ #
923
+ # Remove the last predecessor offset and update the child offset with
924
+ # it
925
+ delta = @offsets[child_index] - pred.offsets.last
926
+ @offsets[child_index] = pred.offsets.pop
927
+ # Adjust all the offsets of the child
928
+ child.offsets.map! { |o| o += delta }
929
+ # And prepend the 0 offset
930
+ child.offsets.unshift(0)
931
+ # Move the child reference
932
+ child.children.unshift(pred.children.pop)
933
+ child.children.first.parent = child
934
+ end
935
+ end
936
+
937
+ def merge_child_with_next(child_index)
938
+ c1 = @children[child_index]
939
+ c2 = @children[child_index + 1]
940
+
941
+ if c1.is_leaf?
942
+ # Update the sibling links
943
+ c1.next_sibling = c2.next_sibling
944
+ c1.next_sibling.prev_sibling = c1 if c1.next_sibling
945
+
946
+ c1.values += c2.values
947
+ # Adjust the last_leaf reference in the @tree if c1 is now the last
948
+ # sibling.
949
+ @tree.last_leaf = c1 unless c1.next_sibling
950
+ else
951
+ # Before:
952
+ #
953
+ # Root Node +---------------------+
954
+ # Offsets | 0 11 |
955
+ # Children | |
956
+ # c1 v c2 v
957
+ # Level 1 +--------------------------++-----+
958
+ # Offsets | 0 4 7 || 0 |
959
+ # Children | | | |
960
+ # v v v v
961
+ # Leaves +---------++-------++----------++-------+
962
+ # Values | A B C D || E F G || H I J K || L M |
963
+ #
964
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
965
+ #
966
+ # After:
967
+ #
968
+ # Root Node +---+
969
+ # Offsets | 0 |
970
+ # Children |
971
+ # c1 v
972
+ # Level 1 +---------------------------------+
973
+ # Offsets | 0 4 7 11 |
974
+ # Children | | | |
975
+ # v v v v
976
+ # Leaves +---------++-------++----------++-------+
977
+ # Values | A B C D || E F G || H I J K || L M |
978
+ #
979
+ # Index 0 1 2 3 4 5 6 7 8 9 10 11 12
980
+ delta = @offsets[child_index + 1] - @offsets[child_index]
981
+ c1.offsets += c2.offsets.map { |o| o += delta }
982
+ c2.children.each { |c| c.parent = c1 }
983
+ c1.children += c2.children
984
+ end
985
+
986
+ # Remove the child successor from the node.
987
+ @offsets.delete_at(child_index + 1)
988
+ @children.delete_at(child_index + 1)
989
+
990
+ if @parent && size < min_size
991
+ @parent.consolidate_child_nodes(self)
992
+ end
993
+ end
994
+
995
+ end
996
+
997
+ end
998
+