perobs 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a988f009b1b97bc8da2b2bcfeb65399284a07032
4
- data.tar.gz: 4346d5cdbec5b4154741f0a41d3f526985803912
3
+ metadata.gz: 4da7810046e0984b00152ab2205ebb2e2d364b94
4
+ data.tar.gz: 74c48af35b674713b222d0dff1b7f0c3ef070b45
5
5
  SHA512:
6
- metadata.gz: 639673c10faa5082c742258ee9848f5e951c6d1ec7fc9f03ec7066260ea5042b6c8b90020df6fa506b8be54cc5fd9793a0939f80ef1da726c27832c8a2eb8c85
7
- data.tar.gz: 92e63145a1fc9b6e76dbdf6f4cdebfb5a8935bfcbd93a9634e22c12f874b945b656a17fe93aa03a7a3189a00371d2e2abd8d411862ad0ad51b0c364f7b7dc027
6
+ metadata.gz: 5746476c388361dee6ac06094e7545e86ecb0569368c089e21429e4e6c3dbf92d8db5e7854249d7214c162f15a629306092dd0ca8bfbceb85eda7c08df95cf90
7
+ data.tar.gz: a495a6127f26bdc7326f1bd7ea3671ff9add499d0b455a9334c884c23a2960ee963eedb8d2bcb5c3f257af9dde4895bebd458a96e7c079dbd0752028e9200614
@@ -27,3 +27,4 @@
27
27
 
28
28
  require "perobs/version"
29
29
  require 'perobs/Store'
30
+ require 'perobs/ConsoleProgressMeter'
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Array.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -44,19 +44,27 @@ module PEROBS
44
44
 
45
45
  attr_reader :data
46
46
 
47
- # These methods do not mutate the Array. They only perform read
48
- # operations.
47
+ # These methods do not mutate the Array but create a new PEROBS::Array
48
+ # object. They only perform read operations.
49
49
  ([
50
- :&, :*, :+, :-, :==, :[], :<=>, :at, :abbrev, :assoc, :bsearch, :collect,
51
- :combination, :compact, :count, :cycle, :dclone, :drop, :drop_while,
50
+ :|, :&, :+, :-, :collect, :compact, :drop, :drop_while,
51
+ :flatten, :map, :reject, :reverse, :rotate, :select, :shuffle, :slice,
52
+ :sort, :take, :take_while, :uniq, :values_at
53
+ ] + Enumerable.instance_methods).uniq.each do |method_sym|
54
+ define_method(method_sym) do |*args, &block|
55
+ @store.cache.cache_read(self)
56
+ @store.new(PEROBS::Array, @data.send(method_sym, *args, &block))
57
+ end
58
+ end
59
+
60
+ # These methods do not mutate the Array and only perform read operations.
61
+ # They do not return basic objects types.
62
+ ([
63
+ :==, :[], :<=>, :at, :bsearch, :bsearch_index, :count, :cycle,
52
64
  :each, :each_index, :empty?, :eql?, :fetch, :find_index, :first,
53
- :flatten, :frozen?, :hash, :include?, :index, :join, :last,
54
- :length, :map, :pack, :permutation, :pretty_print, :pretty_print_cycle,
55
- :product, :rassoc, :reject, :repeated_combination,
56
- :repeated_permutation, :reverse, :reverse_each, :rindex, :rotate,
57
- :sample, :select, :shelljoin, :shuffle, :size, :slice, :sort, :take,
58
- :take_while, :to_a, :to_ary, :to_s, :transpose, :uniq, :values_at, :zip,
59
- :|
65
+ :frozen?, :include?, :index, :join, :last, :length, :pack,
66
+ :pretty_print, :pretty_print_cycle, :reverse_each, :rindex, :sample,
67
+ :size, :to_a, :to_ary, :to_s
60
68
  ] + Enumerable.instance_methods).uniq.each do |method_sym|
61
69
  define_method(method_sym) do |*args, &block|
62
70
  @store.cache.cache_read(self)
@@ -64,12 +72,23 @@ module PEROBS
64
72
  end
65
73
  end
66
74
 
75
+ # These methods mutate the Array and return self.
76
+ [
77
+ :<<, :clear, :collect!, :compact!, :concat,
78
+ :fill, :flatten!, :insert, :keep_if, :map!, :push,
79
+ :reject!, :replace, :select!, :reverse!, :rotate!, :shuffle!,
80
+ :slice!, :sort!, :sort_by!, :uniq!
81
+ ].each do |method_sym|
82
+ define_method(method_sym) do |*args, &block|
83
+ @store.cache.cache_write(self)
84
+ @data.send(method_sym, *args, &block)
85
+ myself
86
+ end
87
+ end
88
+
67
89
  # These methods mutate the Array.
68
90
  [
69
- :<<, :[]=, :clear, :collect!, :compact!, :concat, :delete, :delete_at,
70
- :delete_if, :fill, :flatten!, :insert, :keep_if, :map!, :pop, :push,
71
- :reject!, :replace, :select!, :reverse!, :rotate!, :shift, :shuffle!,
72
- :slice!, :sort!, :sort_by!, :uniq!, :unshift
91
+ :delete, :delete_at, :delete_if, :shift, :pop
73
92
  ].each do |method_sym|
74
93
  define_method(method_sym) do |*args, &block|
75
94
  @store.cache.cache_write(self)
@@ -81,17 +100,45 @@ module PEROBS
81
100
  # PEROBS users should never call this method or equivalents of derived
82
101
  # methods directly.
83
102
  # @param p [PEROBS::Handle] PEROBS handle
84
- # @param size [Integer] The requested size of the Array
103
+ # @param arg1 [Integer or Array] The requested size of the Array or an
104
+ # Array to initialize
85
105
  # @param default [Any] The default value that is returned when no value is
86
106
  # stored for a specific key.
87
- def initialize(p, size = 0, default = nil)
107
+ def initialize(p, arg1 = 0, default = nil, &block)
88
108
  super(p)
89
- @data = ::Array.new(size, default)
109
+ if arg1.is_a?(::Array)
110
+ arg1.each { |v| _check_assignment_value(v) }
111
+ @data = arg1.dup
112
+ elsif block_given?
113
+ @data = ::Array.new(arg1) do
114
+ _check_assignment_value(yield)
115
+ end
116
+ else
117
+ @data = ::Array.new(arg1, _check_assignment_value(default))
118
+ end
90
119
 
91
120
  # Ensure that the newly created object will be pushed into the database.
92
121
  @store.cache.cache_write(self)
93
122
  end
94
123
 
124
+ # Proxy for the assignment method.
125
+ def []=(*args)
126
+ if (args.length == 2)
127
+ _check_assignment_value(args[1])
128
+ else
129
+ _check_assignment_value(args[2])
130
+ end
131
+ @store.cache.cache_write(self)
132
+ @data.[]=(*args)
133
+ end
134
+
135
+ # Proxy for the unshift method.
136
+ def unshift(val)
137
+ _check_assignment_value(val)
138
+ @store.cache.cache_write(self)
139
+ @data.unshift(val)
140
+ end
141
+
95
142
  # Return a list of all object IDs of all persistend objects that this Array
96
143
  # is referencing.
97
144
  # @return [Array of Integer] IDs of referenced objects
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = BTreeNode.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -32,7 +32,7 @@ require 'perobs/BTreeNode'
32
32
 
33
33
  module PEROBS
34
34
 
35
- # This BTree class is very similar to a classic BTree implementation. It
35
+ # This BTree class is very similar to a classic B+Tree implementation. It
36
36
  # manages a tree that is always balanced. The BTree is stored in the
37
37
  # specified directory and partially kept in memory to speed up operations.
38
38
  # The order of the tree specifies how many keys each node will be able to
@@ -40,16 +40,19 @@ module PEROBS
40
40
  # have N + 1 references to child nodes instead.
41
41
  class BTree
42
42
 
43
- attr_reader :order, :nodes, :node_cache
43
+ attr_reader :order, :nodes, :node_cache, :first_leaf, :last_leaf, :size
44
44
 
45
45
  # Create a new BTree object.
46
46
  # @param dir [String] Directory to store the tree file
47
47
  # @param name [String] Base name of the BTree related files in 'dir'
48
48
  # @param order [Integer] The maximum number of keys per node. This number
49
49
  # must be odd and larger than 2 and smaller than 2**16 - 1.
50
- def initialize(dir, name, order)
50
+ # @param progressmeter [ProgressMeter] reference to a ProgressMeter object
51
+ def initialize(dir, name, order, progressmeter)
51
52
  @dir = dir
52
53
  @name = name
54
+ @progressmeter = progressmeter
55
+
53
56
  unless order > 2
54
57
  PEROBS.log.fatal "BTree order must be larger than 2, not #{order}"
55
58
  end
@@ -62,9 +65,14 @@ module PEROBS
62
65
  @order = order
63
66
 
64
67
  # This EquiBlobsFile contains the nodes of the BTree.
65
- @nodes = EquiBlobsFile.new(@dir, @name,
68
+ @nodes = EquiBlobsFile.new(@dir, @name, @progressmeter,
66
69
  BTreeNode::node_bytes(@order))
67
- @node_cache = PersistentObjectCache.new(512, BTreeNode, self)
70
+ @nodes.register_custom_data('first_leaf')
71
+ @nodes.register_custom_data('last_leaf')
72
+ @nodes.register_custom_data('btree_size')
73
+ @node_cache = PersistentObjectCache.new(16384, 5000, BTreeNode, self)
74
+ @root = @first_leaf = @last_leaf = nil
75
+ @size = 0
68
76
 
69
77
  # This BTree implementation uses a write cache to improve write
70
78
  # performance of multiple successive read/write operations. This also
@@ -89,23 +97,46 @@ module PEROBS
89
97
 
90
98
  @node_cache.clear
91
99
  @nodes.open
92
- node = @nodes.total_entries == 0 ?
93
- BTreeNode::create(self) :
94
- BTreeNode::load(self, @nodes.first_entry)
100
+
101
+ if @nodes.total_entries == 0
102
+ # We've created a new nodes file
103
+ node = BTreeNode::create(self)
104
+ else
105
+ # We are loading an existing tree.
106
+ node = BTreeNode::load_and_link(self, @nodes.first_entry)
107
+ @first_leaf = BTreeNode::load_and_link(
108
+ self, @nodes.get_custom_data('first_leaf'))
109
+ @last_leaf = BTreeNode::load_and_link(
110
+ self, @nodes.get_custom_data('last_leaf'))
111
+ end
95
112
  set_root(node)
113
+
114
+ # Get the total number of entries that are stored in the tree.
115
+ @size = @nodes.get_custom_data('btree_size')
96
116
  end
97
117
 
98
118
  # Close the tree file.
99
119
  def close
120
+
121
+ def val_perc(value, total)
122
+ "#{value} (#{(value.to_f / total*100.0).to_i}%)"
123
+ end
124
+
100
125
  sync
101
126
  @nodes.close
102
127
  @root = nil
103
128
  end
104
129
 
130
+ # @return true if file is currently open
131
+ def is_open?
132
+ !@root.nil?
133
+ end
134
+
105
135
  # Clear all pools and forget any registered spaces.
106
136
  def clear
107
137
  @node_cache.clear
108
138
  @nodes.clear
139
+ @size = 0
109
140
  set_root(BTreeNode::create(self))
110
141
  end
111
142
 
@@ -114,6 +145,7 @@ module PEROBS
114
145
  # all stored data from the BTree.
115
146
  def erase
116
147
  @nodes.erase
148
+ @size = 0
117
149
  @root = nil
118
150
  @dirty_flag.forced_unlock
119
151
  end
@@ -121,6 +153,7 @@ module PEROBS
121
153
  # Flush all pending modifications into the tree file.
122
154
  def sync
123
155
  @node_cache.flush(true)
156
+ @nodes.set_custom_data('btree_size', @size)
124
157
  @nodes.sync
125
158
  @dirty_flag.unlock if @dirty_flag.is_locked?
126
159
  end
@@ -128,21 +161,54 @@ module PEROBS
128
161
  # Check if the tree file contains any errors.
129
162
  # @return [Boolean] true if no erros were found, false otherwise
130
163
  def check(&block)
131
- @root.check(&block)
164
+ sync
165
+ return false unless @nodes.check
166
+
167
+ entries = 0
168
+ res = true
169
+ @progressmeter.start('Checking index structure', @size) do |pm|
170
+ res = @root.check do |k, v|
171
+ pm.update(entries += 1)
172
+ block_given? ? yield(k, v) : true
173
+ end
174
+ end
175
+
176
+ unless entries == @size
177
+ PEROBS.log.error "The BTree size (#{@size}) and the number of " +
178
+ "found entries (#{entries}) don't match"
179
+ return false
180
+ end
181
+
182
+ res
132
183
  end
133
184
 
134
185
  # Register a new node as root node of the tree.
186
+ # @param node [BTreeNode]
135
187
  def set_root(node)
136
188
  @root = node
137
189
  @nodes.first_entry = node.node_address
138
190
  end
139
191
 
192
+ # Set the address of the first leaf node.
193
+ # @param node [BTreeNode]
194
+ def set_first_leaf(node)
195
+ @first_leaf = node
196
+ @nodes.set_custom_data('first_leaf', node.node_address)
197
+ end
198
+
199
+ # Set the address of the last leaf node.
200
+ # @param node [BTreeNode]
201
+ def set_last_leaf(node)
202
+ @last_leaf = node
203
+ @nodes.set_custom_data('last_leaf', node.node_address)
204
+ end
205
+
140
206
  # Insert a new value into the tree using the key as a unique index. If the
141
207
  # key already exists the old value will be overwritten.
142
208
  # @param key [Integer] Unique key
143
209
  # @param value [Integer] value
144
210
  def insert(key, value)
145
- @root.insert(key, value)
211
+ @size += 1 if @root.insert(key, value)
146
212
  @node_cache.flush
147
213
  end
148
214
 
@@ -157,7 +223,7 @@ module PEROBS
157
223
  # Find and remove the value associated with the given key. If no entry was
158
224
  # found, return nil, otherwise the found value.
159
225
  def remove(key)
160
- removed_value = @root.remove(key)
226
+ @size -= 1 unless (removed_value = @root.remove(key)).nil?
161
227
 
162
228
  # Check if the root node only contains one child link after the delete
163
229
  # operation. Then we can delete that node and pull the tree one level
@@ -187,6 +253,11 @@ module PEROBS
187
253
  @nodes.delete_blob(address)
188
254
  end
189
255
 
256
+ # @return [Integer] The number of entries stored in the tree.
257
+ def entries_count
258
+ @size
259
+ end
260
+
190
261
  # @return [String] Human reable form of the tree.
191
262
  def to_s
192
263
  @root.to_s
@@ -33,7 +33,7 @@ require 'perobs/RobustFile'
33
33
  module PEROBS
34
34
 
35
35
  # This class manages the usage of the data blobs in the corresponding
36
- # HashedBlobsDB object.
36
+ # BTreeDB object.
37
37
  class BTreeBlob
38
38
 
39
39
  # Magic number used for index files.
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = BTreeDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -58,7 +58,7 @@ module PEROBS
58
58
  # nodes. The insert/find/delete time grows
59
59
  # linearly with the size.
60
60
  def initialize(db_name, options = {})
61
- super(options[:serializer] || :json)
61
+ super(options)
62
62
 
63
63
  @db_dir = db_name
64
64
  # Create the database directory if it doesn't exist yet.
@@ -39,7 +39,8 @@ module PEROBS
39
39
  # mark a node as leaf or branch node.
40
40
  class BTreeNode
41
41
 
42
- attr_reader :node_address, :parent, :is_leaf, :keys, :values, :children
42
+ attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling,
43
+ :keys, :values, :children
43
44
 
44
45
  # Create a new BTreeNode object for the given tree with the given parent
45
46
  # or recreate the node with the given node_address from the backing store.
@@ -48,18 +49,23 @@ module PEROBS
48
49
  # restore the node.
49
50
  # @param tree [BTree] The tree this node is part of
50
51
  # @param parent [BTreeNode] reference to parent node
52
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
53
+ # @param next_sibling [BTreeNode] reference to next sibling node
51
54
  # @param node_address [Integer] the address of the node to read from the
52
55
  # backing store
53
56
  # @param is_leaf [Boolean] true if the node should be a leaf node, false
54
57
  # if not
55
58
  def initialize(tree, node_address = nil, parent = nil, is_leaf = true,
59
+ prev_sibling = nil, next_sibling = nil,
56
60
  keys = [], values = [], children = [])
57
61
  @tree = tree
58
62
  if node_address == 0
59
63
  PEROBS.log.fatal "Node address may not be 0"
60
64
  end
61
65
  @node_address = node_address
62
- @parent = parent ? BTreeNodeLink.new(tree, parent) : nil
66
+ @parent = link(parent)
67
+ @prev_sibling = link(prev_sibling)
68
+ @next_sibling = link(next_sibling)
63
69
  @keys = keys
64
70
  if (@is_leaf = is_leaf)
65
71
  @values = values
@@ -68,18 +74,6 @@ module PEROBS
68
74
  @children = children
69
75
  @values = []
70
76
  end
71
-
72
- ObjectSpace.define_finalizer(
73
- self, BTreeNode._finalize(@tree, @node_address, object_id))
74
- @tree.node_cache.insert(self, false)
75
- end
76
-
77
- # This method generates the destructor for the objects of this class. It
78
- # is done this way to prevent the Proc object hanging on to a reference to
79
- # self which would prevent the object from being collected. This internal
80
- # method is not intended for users to call.
81
- def BTreeNode::_finalize(tree, node_address, ruby_object_id)
82
- proc { tree.node_cache._collect(node_address, ruby_object_id) }
83
77
  end
84
78
 
85
79
  # Create a new SpaceTreeNode. This method should be used for the creation
@@ -88,7 +82,10 @@ module PEROBS
88
82
  # @param parent [BTreeNode] The parent node
89
83
  # @param is_leaf [Boolean] True if the node has no children, false
90
84
  # otherwise
91
- def BTreeNode::create(tree, parent = nil, is_leaf = true)
85
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
86
+ # @param next_sibling [BTreeNode] reference to next sibling node
87
+ def BTreeNode::create(tree, parent = nil, is_leaf = true,
88
+ prev_sibling = nil, next_sibling = nil)
92
89
  unless parent.nil? || parent.is_a?(BTreeNode) ||
93
90
  parent.is_a?(BTreeNodeLink)
94
91
  PEROBS.log.fatal "Parent node must be a BTreeNode but is of class " +
@@ -96,17 +93,26 @@ module PEROBS
96
93
  end
97
94
 
98
95
  address = tree.nodes.free_address
99
- node = BTreeNode.new(tree, address, parent, is_leaf)
96
+ node = BTreeNode.new(tree, address, parent, is_leaf, prev_sibling,
97
+ next_sibling)
100
98
  # This is a new node. Make sure the data is written to the file.
101
99
  tree.node_cache.insert(node)
102
100
 
103
- node
101
+ # Insert the newly created node into the existing node chain.
102
+ if (node.prev_sibling = prev_sibling)
103
+ node.prev_sibling.next_sibling = BTreeNodeLink.new(tree, node)
104
+ end
105
+ if (node.next_sibling = next_sibling)
106
+ node.next_sibling.prev_sibling = BTreeNodeLink.new(tree, node)
107
+ end
108
+
109
+ BTreeNodeLink.new(tree, node)
104
110
  end
105
111
 
106
112
  # Restore a node from the backing store at the given address and tree.
107
113
  # @param tree [BTree] The tree the node belongs to
108
- # @param node_address [Integer] The address in the blob file.
109
- def BTreeNode::load(tree, address)
114
+ # @param address [Integer] The address in the blob file.
115
+ def BTreeNode::load(tree, address, unused = nil)
110
116
  unless address.is_a?(Integer)
111
117
  PEROBS.log.fatal "address is not Integer: #{address.class}"
112
118
  end
@@ -130,19 +136,21 @@ module PEROBS
130
136
  data_count = ary[2]
131
137
  # Read the parent node address
132
138
  parent = ary[3] == 0 ? nil : BTreeNodeLink.new(tree, ary[3])
139
+ prev_sibling = ary[4] == 0 ? nil : BTreeNodeLink.new(tree, ary[4])
140
+ next_sibling = ary[5] == 0 ? nil : BTreeNodeLink.new(tree, ary[5])
133
141
  # Read the keys
134
- keys = ary[4, key_count]
142
+ keys = ary[6, key_count]
135
143
 
136
144
  children = nil
137
145
  values = nil
138
146
  if is_leaf
139
147
  # Read the values
140
- values = ary[4 + tree.order, data_count]
148
+ values = ary[6 + tree.order, data_count]
141
149
  else
142
150
  # Read the child addresses
143
151
  children = []
144
152
  data_count.times do |i|
145
- child_address = ary[4 + tree.order + i]
153
+ child_address = ary[6 + tree.order + i]
146
154
  unless child_address > 0
147
155
  PEROBS.log.fatal "Child address must be larger than 0"
148
156
  end
@@ -150,17 +158,28 @@ module PEROBS
150
158
  end
151
159
  end
152
160
 
153
- node = BTreeNode.new(tree, address, parent, is_leaf, keys, values,
161
+ node = BTreeNode.new(tree, address, parent, is_leaf,
162
+ prev_sibling, next_sibling, keys, values,
154
163
  children)
155
164
  tree.node_cache.insert(node, false)
156
165
 
157
166
  node
158
167
  end
159
168
 
169
+ # This is a wrapper around BTreeNode::load() that returns a BTreeNodeLink
170
+ # instead of the actual node.
171
+ # @param tree [BTree] The tree the node belongs to
172
+ # @param address [Integer] The address in the blob file.
173
+ # @return [BTreeNodeLink] Link to loaded noded
174
+ def BTreeNode::load_and_link(tree, address)
175
+ BTreeNodeLink.new(tree, BTreeNode::load(tree, address))
176
+ end
177
+
178
+
160
179
  # @return [String] The format used for String.pack.
161
180
  def BTreeNode::node_bytes_format(tree)
162
181
  # This does not include the 4 bytes for the CRC32 checksum
163
- "CSSQQ#{tree.order}Q#{tree.order + 1}"
182
+ "CSSQQQQ#{tree.order}Q#{tree.order + 1}"
164
183
  end
165
184
 
166
185
  # @return [Integer] The number of bytes needed to store a node.
@@ -169,6 +188,8 @@ module PEROBS
169
188
  2 + # actual key count
170
189
  2 + # actual value or children count (aka data count)
171
190
  8 + # parent address
191
+ 8 + # previous sibling address
192
+ 8 + # next sibling address
172
193
  8 * order + # keys
173
194
  8 * (order + 1) + # values or child addresses
174
195
  4 # CRC32 checksum
@@ -200,8 +221,7 @@ module PEROBS
200
221
 
201
222
  # Once we have reached a leaf node we can insert or replace the value.
202
223
  if node.is_leaf
203
- node.insert_element(key, value)
204
- return
224
+ return node.insert_element(key, value)
205
225
  else
206
226
  # Descend into the right child node to add the value to.
207
227
  node = node.children[node.search_key_index(key)]
@@ -269,18 +289,19 @@ module PEROBS
269
289
  def split_node
270
290
  unless @parent
271
291
  # The node is the root node. We need to create a parent node first.
272
- self.parent = BTreeNode::create(@tree, nil, false)
292
+ self.parent = link(BTreeNode::create(@tree, nil, false))
273
293
  @parent.set_child(0, self)
274
294
  @tree.set_root(@parent)
275
295
  end
276
296
 
277
297
  # Create the new sibling that will take the 2nd half of the
278
298
  # node content.
279
- sibling = BTreeNode::create(@tree, @parent, @is_leaf)
299
+ sibling = BTreeNode::create(@tree, @parent, @is_leaf, link(self),
300
+ @next_sibling)
280
301
  # Determine the index of the middle element that gets moved to the
281
302
  # parent. The order must be an uneven number, so adding 1 will get us
282
303
  # the middle element.
283
- mid = @tree.order / 2 + 1
304
+ mid = @tree.order / 2
284
305
  # Insert the middle element key into the parent node
285
306
  @parent.insert_element(@keys[mid], sibling)
286
307
  copy_elements(mid + (@is_leaf ? 0 : 1), sibling)
@@ -297,6 +318,9 @@ module PEROBS
297
318
  insert_element(@parent.keys[parent_index], upper_sibling.children[0])
298
319
  end
299
320
  upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size)
321
+ if (@next_sibling = link(upper_sibling.next_sibling))
322
+ @next_sibling.prev_sibling = link(self)
323
+ end
300
324
  @tree.delete_node(upper_sibling.node_address)
301
325
 
302
326
  @parent.remove_element(parent_index)
@@ -307,6 +331,7 @@ module PEROBS
307
331
  # @param key [Integer] key to address the value or child
308
332
  # @param value_or_child [Integer or BTreeNode] value or BTreeNode
309
333
  # reference
334
+ # @return true for insert, false for overwrite
310
335
  def insert_element(key, value_or_child)
311
336
  if @keys.size >= @tree.order
312
337
  PEROBS.log.fatal "Cannot insert into a full BTreeNode"
@@ -319,51 +344,48 @@ module PEROBS
319
344
  if is_leaf
320
345
  @values[i] = value_or_child
321
346
  else
322
- @children[i + 1] = BTreeNodeLink.new(@tree, value_or_child)
347
+ @children[i + 1] = link(value_or_child)
323
348
  end
349
+ @tree.node_cache.insert(self)
350
+
351
+ return false
324
352
  else
325
353
  # Create a new entry
326
354
  @keys.insert(i, key)
327
355
  if is_leaf
328
356
  @values.insert(i, value_or_child)
329
357
  else
330
- @children.insert(i + 1, BTreeNodeLink.new(@tree, value_or_child))
358
+ @children.insert(i + 1, link(value_or_child))
331
359
  end
360
+ @tree.node_cache.insert(self)
361
+
362
+ return true
332
363
  end
333
- @tree.node_cache.insert(self)
334
364
  end
335
365
 
336
366
  # Remove the element at the given index.
337
367
  def remove_element(index)
338
- # We need this key to find the link in the parent node.
339
- first_key = @keys[0]
340
- removed_value = nil
341
-
342
368
  # Delete the key at the specified index.
343
- unless @keys.delete_at(index)
344
- PEROBS.log.fatal "Could not remove element #{index} from BTreeNode " +
369
+ unless (key = @keys.delete_at(index))
370
+ PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
345
371
  "@#{@node_address}"
346
372
  end
347
- if @is_leaf
348
- # For leaf nodes, also delete the corresponding value.
349
- removed_value = @values.delete_at(index)
350
- else
351
- # The corresponding child has can be found at 1 index higher.
352
- @children.delete_at(index + 1)
353
- end
354
- @tree.node_cache.insert(self)
373
+ update_branch_key(key) if index == 0
355
374
 
356
- # Find the lower and upper siblings and the index of the key for this
357
- # node in the parent node.
358
- lower_sibling, upper_sibling, parent_index =
359
- find_closest_siblings(first_key)
375
+ # Delete the corresponding value.
376
+ removed_value = @values.delete_at(index)
377
+ @tree.node_cache.insert(self)
360
378
 
361
- if lower_sibling &&
362
- lower_sibling.keys.size + @keys.size < @tree.order
363
- lower_sibling.merge_node(self, parent_index - 1)
364
- elsif upper_sibling &&
365
- @keys.size + upper_sibling.keys.size < @tree.order
366
- merge_node(upper_sibling, parent_index)
379
+ if @keys.length < min_keys
380
+ if @prev_sibling && @prev_sibling.parent == @parent
381
+ borrow_from_previous_sibling(@prev_sibling) ||
382
+ @prev_sibling.merge_with_leaf_node(self)
383
+ elsif @next_sibling && @next_sibling.parent == @parent
384
+ borrow_from_next_sibling(@next_sibling) ||
385
+ merge_with_leaf_node(@next_sibling)
386
+ elsif @parent
387
+ PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
388
+ end
367
389
  end
368
390
 
369
391
  # The merge has potentially invalidated this node. After this method has
@@ -371,7 +393,88 @@ module PEROBS
371
393
  removed_value
372
394
  end
373
395
 
396
+ def remove_child(node)
397
+ unless (index = search_node_index(node))
398
+ PEROBS.log.fatal "Cannot remove child #{node.node_address} " +
399
+ "from node #{@node_address}"
400
+ end
401
+
402
+ @tree.node_cache.insert(self)
403
+ if index == 0
404
+ # Removing the first child is a bit more complicated as the
405
+ # corresponding branch key is in a parent node.
406
+ key = @keys.shift
407
+ update_branch_key(key)
408
+ else
409
+ # For all other children we can just remove the corresponding key.
410
+ @keys.delete_at(index - 1)
411
+ end
412
+
413
+ # Remove the child node link.
414
+ child = @children.delete_at(index)
415
+ # Unlink the neighbouring siblings from the child
416
+ child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
417
+ child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
418
+
419
+ if @keys.length < min_keys
420
+ # The node has become too small. Try borrowing a node from an adjecent
421
+ # sibling or merge with an adjecent node.
422
+ if @prev_sibling && @prev_sibling.parent == @parent
423
+ borrow_from_previous_sibling(@prev_sibling) ||
424
+ @prev_sibling.merge_with_branch_node(self)
425
+ elsif @next_sibling && @next_sibling.parent == @parent
426
+ borrow_from_next_sibling(@next_sibling) ||
427
+ merge_with_branch_node(@next_sibling)
428
+ end
429
+ end
430
+
431
+ if @parent.nil? && @children.length == 1
432
+ # If the node just below the root only has one child it will become
433
+ # the new root node.
434
+ new_root = @children.first
435
+ new_root.parent = nil
436
+ @tree.set_root(new_root)
437
+ end
438
+ end
439
+
440
+ def merge_with_leaf_node(node)
441
+ if @keys.length + node.keys.length > @tree.order
442
+ PEROBS.log.fatal "Leaf nodes are too big to merge"
443
+ end
444
+
445
+ @keys += node.keys
446
+ @values += node.values
447
+ @tree.node_cache.insert(self)
448
+
449
+ node.parent.remove_child(node)
450
+ end
451
+
452
+ def merge_with_branch_node(node)
453
+ if @keys.length + 1 + node.keys.length > @tree.order
454
+ PEROBS.log.fatal "Branch nodes are too big to merge"
455
+ end
456
+
457
+ index = @parent.search_node_index(node) - 1
458
+ @keys << @parent.keys[index]
459
+ @keys += node.keys
460
+ node.children.each { |c| c.parent = link(self) }
461
+ @children += node.children
462
+ @tree.node_cache.insert(self)
463
+
464
+ node.parent.remove_child(node)
465
+ end
466
+
467
+ def search_node_index(node)
468
+ index = search_key_index(node.keys.first)
469
+ unless @children[index] == node
470
+ raise RuntimeError, "Child at index #{index} is not the requested node"
471
+ end
472
+
473
+ index
474
+ end
475
+
374
476
  def copy_elements(src_idx, dest_node, dst_idx = 0, count = nil)
477
+ dest_node = dest_node.get_node
375
478
  unless count
376
479
  count = @tree.order - src_idx
377
480
  end
@@ -399,18 +502,47 @@ module PEROBS
399
502
  end
400
503
 
401
504
  def parent=(p)
402
- @parent = p ? BTreeNodeLink.new(@tree, p) : nil
505
+ @parent = p
506
+ @tree.node_cache.insert(self)
507
+
508
+ p
509
+ end
510
+
511
+ def prev_sibling=(node)
512
+ @prev_sibling = node
513
+ if node.nil? && @is_leaf
514
+ # If this node is a leaf node without a previous sibling we need to
515
+ # register it as the first leaf node.
516
+ @tree.set_first_leaf(BTreeNodeLink.new(@tree, self))
517
+ end
518
+
403
519
  @tree.node_cache.insert(self)
520
+
521
+ node
522
+ end
523
+
524
+ def next_sibling=(node)
525
+ @next_sibling = node
526
+ @tree.node_cache.insert(self)
527
+ if node.nil? && @is_leaf
528
+ # If this node is a leaf node without a next sibling we need to
529
+ # register it as the last leaf node.
530
+ @tree.set_last_leaf(BTreeNodeLink.new(@tree, self))
531
+ end
532
+
533
+ node
404
534
  end
405
535
 
406
536
  def set_child(index, child)
407
537
  if child
408
- @children[index] = BTreeNodeLink.new(@tree, child)
409
- @children[index].parent = self
538
+ @children[index] = link(child)
539
+ @children[index].parent = link(self)
410
540
  else
411
541
  @children[index] = nil
412
542
  end
413
543
  @tree.node_cache.insert(self)
544
+
545
+ child
414
546
  end
415
547
 
416
548
  def trim(idx)
@@ -511,12 +643,20 @@ module PEROBS
511
643
  # @yield [key, value]
512
644
  # @return [Boolean] true if tree has no errors
513
645
  def check
646
+ branch_depth = nil
647
+
514
648
  traverse do |node, position, stack|
515
649
  if position == 0
516
- if node.parent && node.keys.size < 1
517
- node.error "BTreeNode must have at least one entry"
518
- return false
650
+ if node.parent
651
+ # After a split the nodes will only have half the maximum keys.
652
+ # For branch nodes one of the split nodes will have even 1 key
653
+ # less as this will become the branch key in a parent node.
654
+ if node.keys.size < min_keys - (node.is_leaf ? 0 : 1)
655
+ node.error "BTreeNode #{node.node_address} has too few keys"
656
+ return false
657
+ end
519
658
  end
659
+
520
660
  if node.keys.size > @tree.order
521
661
  node.error "BTreeNode must not have more then #{@tree.order} " +
522
662
  "keys, but has #{node.keys.size} keys"
@@ -529,16 +669,43 @@ module PEROBS
529
669
  "#{node.keys.inspect}"
530
670
  return false
531
671
  end
672
+ last_key = key
532
673
  end
533
674
 
534
675
  if node.is_leaf
676
+ if branch_depth
677
+ unless branch_depth == stack.size
678
+ node.error "All leaf nodes must have same distance from root "
679
+ return false
680
+ end
681
+ else
682
+ branch_depth = stack.size
683
+ end
684
+ if node.prev_sibling.nil? && @tree.first_leaf != node
685
+ node.error "Leaf node #{node.node_address} has no previous " +
686
+ "sibling but is not the first leaf of the tree"
687
+ return false
688
+ end
689
+ if node.next_sibling.nil? && @tree.last_leaf != node
690
+ node.error "Leaf node #{node.node_address} has no next sibling " +
691
+ "but is not the last leaf of the tree"
692
+ return false
693
+ end
535
694
  unless node.keys.size == node.values.size
536
695
  node.error "Key count (#{node.keys.size}) and value " +
537
696
  "count (#{node.values.size}) don't match"
538
697
  return false
539
698
  end
699
+ unless node.children.empty?
700
+ node.error "@children must be nil for a leaf node"
701
+ return false
702
+ end
540
703
  else
541
- unless node.keys.size == node.children.size - 1
704
+ unless node.values.empty?
705
+ node.error "@values must be nil for a branch node"
706
+ return false
707
+ end
708
+ unless node.children.size == node.keys.size + 1
542
709
  node.error "Key count (#{node.keys.size}) must be one " +
543
710
  "less than children count (#{node.children.size})"
544
711
  return false
@@ -551,10 +718,10 @@ module PEROBS
551
718
  end
552
719
  unless child.parent.is_a?(BTreeNodeLink)
553
720
  node.error "Parent reference of child #{i} is of class " +
554
- "#{child.class} instead of BTreeNodeLink"
721
+ "#{child.parent.class} instead of BTreeNodeLink"
555
722
  return false
556
723
  end
557
- if child.node_address == node.node_address
724
+ if child == node
558
725
  node.error "Child #{i} points to self"
559
726
  return false
560
727
  end
@@ -567,6 +734,22 @@ module PEROBS
567
734
  "to this node"
568
735
  return false
569
736
  end
737
+ if i > 0
738
+ unless node.children[i - 1].next_sibling == child
739
+ node.error "next_sibling of node " +
740
+ "#{node.children[i - 1].node_address} " +
741
+ "must point to node #{child.node_address}"
742
+ return false
743
+ end
744
+ end
745
+ if i < node.children.length - 1
746
+ unless child == node.children[i + 1].prev_sibling
747
+ node.error "prev_sibling of node " +
748
+ "#{node.children[i + 1].node_address} " +
749
+ "must point to node #{child.node_address}"
750
+ return false
751
+ end
752
+ end
570
753
  end
571
754
  end
572
755
  elsif position <= node.keys.size
@@ -580,8 +763,7 @@ module PEROBS
580
763
  "Must be smaller than #{node.keys[index]}."
581
764
  return false
582
765
  end
583
- unless node.children[position].keys.first >=
584
- node.keys[index]
766
+ unless node.children[position].keys.first >= node.keys[index]
585
767
  node.error "Child #{node.children[position].node_address} " +
586
768
  "has too small key #{node.children[position].keys.first}. " +
587
769
  "Must be larger than or equal to #{node.keys[index]}."
@@ -671,13 +853,26 @@ module PEROBS
671
853
  s += ' ^@'
672
854
  end
673
855
  end
856
+ if @prev_sibling
857
+ begin
858
+ s += " <#{@prev_sibling.node_address}"
859
+ rescue
860
+ s += ' <@'
861
+ end
862
+ end
863
+ if @next_sibling
864
+ begin
865
+ s += " >#{@next_sibling.node_address}"
866
+ rescue
867
+ s += ' >@'
868
+ end
869
+ end
674
870
 
675
871
  s
676
872
  end
677
873
 
678
874
  def error(msg)
679
- PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}\n" +
680
- @tree.to_s
875
+ PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}"
681
876
  end
682
877
 
683
878
  def write_node
@@ -685,7 +880,9 @@ module PEROBS
685
880
  @is_leaf ? 1 : 0,
686
881
  @keys.size,
687
882
  @is_leaf ? @values.size : @children.size,
688
- @parent ? @parent.node_address : 0
883
+ @parent ? @parent.node_address : 0,
884
+ @prev_sibling ? @prev_sibling.node_address : 0,
885
+ @next_sibling ? @next_sibling.node_address : 0
689
886
  ] + @keys + ::Array.new(@tree.order - @keys.size, 0)
690
887
 
691
888
  if @is_leaf
@@ -708,24 +905,107 @@ module PEROBS
708
905
 
709
906
  private
710
907
 
711
- def find_closest_siblings(key)
712
- # The root node has no siblings.
713
- return [ nil, nil, nil ] unless @parent
908
+ def min_keys
909
+ @tree.order / 2
910
+ end
911
+
912
+ def link(node)
913
+ return nil if node.nil?
914
+
915
+ if node.is_a?(BTreeNodeLink)
916
+ return node
917
+ elsif node.is_a?(BTreeNode) || node.is_a?(Integer)
918
+ return BTreeNodeLink.new(@tree, node)
919
+ else
920
+ PEROBS.log.fatal "Node link must be a BTreeNode, not a #{node.class}"
921
+ end
922
+ end
923
+
924
+ # Try to borrow an element from the preceding sibling.
925
+ # @return [True or False] True if an element was borrowed, false
926
+ # otherwise.
927
+ def borrow_from_previous_sibling(prev_node)
928
+ if prev_node.keys.length - 1 > min_keys
929
+ index = @parent.search_node_index(self) - 1
930
+
931
+ @tree.node_cache.insert(self)
932
+ @tree.node_cache.insert(prev_node.get_node)
933
+ @tree.node_cache.insert(@parent.get_node)
934
+ if @is_leaf
935
+ # Move the last key of the previous node to the front of this node
936
+ @keys.unshift(prev_node.keys.pop)
937
+ # Register the new lead key of this node with its parent
938
+ @parent.keys[index] = @keys.first
939
+ # Move the last value of the previous node to the front of this node
940
+ @values.unshift(prev_node.values.pop)
941
+ else
942
+ # For branch nodes the branch key will be the borrowed key.
943
+ @keys.unshift(@parent.keys[index])
944
+ # And the last key of the previous key will become the new branch
945
+ # key for this node.
946
+ @parent.keys[index] = prev_node.keys.pop
947
+ # Move the last child of the previous node to the front of this node
948
+ @children.unshift(node = prev_node.children.pop)
949
+ node.parent = link(self)
950
+ end
951
+
952
+ return true
953
+ end
954
+
955
+ false
956
+ end
957
+
958
+ # Try to borrow an element from the next sibling.
959
+ # @return [True or False] True if an element was borrowed, false
960
+ # otherwise.
961
+ def borrow_from_next_sibling(next_node)
962
+ if next_node.keys.length - 1 > min_keys
963
+ # The next sibling now has a new lead key that requires the branch key
964
+ # to be updated in the parent node.
965
+ index = next_node.parent.search_node_index(next_node) - 1
966
+
967
+ @tree.node_cache.insert(self)
968
+ @tree.node_cache.insert(next_node.get_node)
969
+ @tree.node_cache.insert(next_node.parent.get_node)
970
+ if @is_leaf
971
+ # Move the first key of the next node to the end of the this node
972
+ @keys << next_node.keys.shift
973
+ # Register the new lead key of next_node with its parent
974
+ next_node.parent.keys[index] = next_node.keys.first
975
+ # Move the first value of the next node to the end of this node
976
+ @values << next_node.values.shift
977
+ else
978
+ # For branch nodes we need to get the lead key from the parent of
979
+ # next_node.
980
+ @keys << next_node.parent.keys[index]
981
+ # The old lead key of next_node becomes the branch key in the parent
982
+ # of next_node. And the keys of next_node are shifted.
983
+ next_node.parent.keys[index] = next_node.keys.shift
984
+ # Move the first child of the next node to the end of this node
985
+ @children << (node = next_node.children.shift)
986
+ node.parent = link(self)
987
+ end
988
+
989
+ return true
990
+ end
991
+
992
+ false
993
+ end
994
+
995
+ def update_branch_key(old_key)
996
+ new_key = @keys.first
997
+ return unless (node = @parent)
714
998
 
715
- parent_index = @parent.search_key_index(key)
716
- unless @parent.children[parent_index] == self
717
- PEROBS.log.fatal "Failed to find self in parent"
999
+ while node
1000
+ if (index = node.keys.index(old_key))
1001
+ node.keys[index] = new_key
1002
+ @tree.node_cache.insert(node.get_node)
1003
+ return
1004
+ end
1005
+ node = node.parent
718
1006
  end
719
- # The child that corresponds to the key at parent_index has an index of
720
- # parent_index + 1! The lower_sibling has an child index of
721
- # parent_index and the upper sibling has a child index of parent_index +
722
- # 2.
723
- lower_sibling = parent_index < 1 ?
724
- nil : @parent.children[parent_index - 1]
725
- upper_sibling = parent_index >= (@parent.children.size - 1) ?
726
- nil : @parent.children[parent_index + 1]
727
1007
 
728
- [ lower_sibling, upper_sibling, parent_index ]
1008
+ # The smallest element has no branch key.
729
1009
  end
730
1010
 
731
1011
  end