perobs 4.0.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a988f009b1b97bc8da2b2bcfeb65399284a07032
4
- data.tar.gz: 4346d5cdbec5b4154741f0a41d3f526985803912
3
+ metadata.gz: 4da7810046e0984b00152ab2205ebb2e2d364b94
4
+ data.tar.gz: 74c48af35b674713b222d0dff1b7f0c3ef070b45
5
5
  SHA512:
6
- metadata.gz: 639673c10faa5082c742258ee9848f5e951c6d1ec7fc9f03ec7066260ea5042b6c8b90020df6fa506b8be54cc5fd9793a0939f80ef1da726c27832c8a2eb8c85
7
- data.tar.gz: 92e63145a1fc9b6e76dbdf6f4cdebfb5a8935bfcbd93a9634e22c12f874b945b656a17fe93aa03a7a3189a00371d2e2abd8d411862ad0ad51b0c364f7b7dc027
6
+ metadata.gz: 5746476c388361dee6ac06094e7545e86ecb0569368c089e21429e4e6c3dbf92d8db5e7854249d7214c162f15a629306092dd0ca8bfbceb85eda7c08df95cf90
7
+ data.tar.gz: a495a6127f26bdc7326f1bd7ea3671ff9add499d0b455a9334c884c23a2960ee963eedb8d2bcb5c3f257af9dde4895bebd458a96e7c079dbd0752028e9200614
@@ -27,3 +27,4 @@
27
27
 
28
28
  require "perobs/version"
29
29
  require 'perobs/Store'
30
+ require 'perobs/ConsoleProgressMeter'
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Array.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -44,19 +44,27 @@ module PEROBS
44
44
 
45
45
  attr_reader :data
46
46
 
47
- # These methods do not mutate the Array. They only perform read
48
- # operations.
47
+ # These methods do not mutate the Array but create a new PEROBS::Array
48
+ # object. They only perform read operations.
49
49
  ([
50
- :&, :*, :+, :-, :==, :[], :<=>, :at, :abbrev, :assoc, :bsearch, :collect,
51
- :combination, :compact, :count, :cycle, :dclone, :drop, :drop_while,
50
+ :|, :&, :+, :-, :collect, :compact, :drop, :drop_while,
51
+ :flatten, :map, :reject, :reverse, :rotate, :select, :shuffle, :slice,
52
+ :sort, :take, :take_while, :uniq, :values_at
53
+ ] + Enumerable.instance_methods).uniq.each do |method_sym|
54
+ define_method(method_sym) do |*args, &block|
55
+ @store.cache.cache_read(self)
56
+ @store.new(PEROBS::Array, @data.send(method_sym, *args, &block))
57
+ end
58
+ end
59
+
60
+ # These methods do not mutate the Array and only perform read operations.
61
+ # They do not return basic objects types.
62
+ ([
63
+ :==, :[], :<=>, :at, :bsearch, :bsearch_index, :count, :cycle,
52
64
  :each, :each_index, :empty?, :eql?, :fetch, :find_index, :first,
53
- :flatten, :frozen?, :hash, :include?, :index, :join, :last,
54
- :length, :map, :pack, :permutation, :pretty_print, :pretty_print_cycle,
55
- :product, :rassoc, :reject, :repeated_combination,
56
- :repeated_permutation, :reverse, :reverse_each, :rindex, :rotate,
57
- :sample, :select, :shelljoin, :shuffle, :size, :slice, :sort, :take,
58
- :take_while, :to_a, :to_ary, :to_s, :transpose, :uniq, :values_at, :zip,
59
- :|
65
+ :frozen?, :include?, :index, :join, :last, :length, :pack,
66
+ :pretty_print, :pretty_print_cycle, :reverse_each, :rindex, :sample,
67
+ :size, :to_a, :to_ary, :to_s
60
68
  ] + Enumerable.instance_methods).uniq.each do |method_sym|
61
69
  define_method(method_sym) do |*args, &block|
62
70
  @store.cache.cache_read(self)
@@ -64,12 +72,23 @@ module PEROBS
64
72
  end
65
73
  end
66
74
 
75
+ # These methods mutate the Array and return self.
76
+ [
77
+ :<<, :clear, :collect!, :compact!, :concat,
78
+ :fill, :flatten!, :insert, :keep_if, :map!, :push,
79
+ :reject!, :replace, :select!, :reverse!, :rotate!, :shuffle!,
80
+ :slice!, :sort!, :sort_by!, :uniq!
81
+ ].each do |method_sym|
82
+ define_method(method_sym) do |*args, &block|
83
+ @store.cache.cache_write(self)
84
+ @data.send(method_sym, *args, &block)
85
+ myself
86
+ end
87
+ end
88
+
67
89
  # These methods mutate the Array.
68
90
  [
69
- :<<, :[]=, :clear, :collect!, :compact!, :concat, :delete, :delete_at,
70
- :delete_if, :fill, :flatten!, :insert, :keep_if, :map!, :pop, :push,
71
- :reject!, :replace, :select!, :reverse!, :rotate!, :shift, :shuffle!,
72
- :slice!, :sort!, :sort_by!, :uniq!, :unshift
91
+ :delete, :delete_at, :delete_if, :shift, :pop
73
92
  ].each do |method_sym|
74
93
  define_method(method_sym) do |*args, &block|
75
94
  @store.cache.cache_write(self)
@@ -81,17 +100,45 @@ module PEROBS
81
100
  # PEROBS users should never call this method or equivalents of derived
82
101
  # methods directly.
83
102
  # @param p [PEROBS::Handle] PEROBS handle
84
- # @param size [Integer] The requested size of the Array
103
+ # @param arg1 [Integer or Array] The requested size of the Array or an
104
+ # Array to initialize
85
105
  # @param default [Any] The default value that is returned when no value is
86
106
  # stored for a specific key.
87
- def initialize(p, size = 0, default = nil)
107
+ def initialize(p, arg1 = 0, default = nil, &block)
88
108
  super(p)
89
- @data = ::Array.new(size, default)
109
+ if arg1.is_a?(::Array)
110
+ arg1.each { |v| _check_assignment_value(v) }
111
+ @data = arg1.dup
112
+ elsif block_given?
113
+ @data = ::Array.new(arg1) do
114
+ _check_assignment_value(yield)
115
+ end
116
+ else
117
+ @data = ::Array.new(arg1, _check_assignment_value(default))
118
+ end
90
119
 
91
120
  # Ensure that the newly created object will be pushed into the database.
92
121
  @store.cache.cache_write(self)
93
122
  end
94
123
 
124
+ # Proxy for the assignment method.
125
+ def []=(*args)
126
+ if (args.length == 2)
127
+ _check_assignment_value(args[1])
128
+ else
129
+ _check_assignment_value(args[2])
130
+ end
131
+ @store.cache.cache_write(self)
132
+ @data.[]=(*args)
133
+ end
134
+
135
+ # Proxy for the unshift method.
136
+ def unshift(val)
137
+ _check_assignment_value(val)
138
+ @store.cache.cache_write(self)
139
+ @data.unshift(val)
140
+ end
141
+
95
142
  # Return a list of all object IDs of all persistend objects that this Array
96
143
  # is referencing.
97
144
  # @return [Array of Integer] IDs of referenced objects
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = BTreeNode.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -32,7 +32,7 @@ require 'perobs/BTreeNode'
32
32
 
33
33
  module PEROBS
34
34
 
35
- # This BTree class is very similar to a classic BTree implementation. It
35
+ # This BTree class is very similar to a classic B+Tree implementation. It
36
36
  # manages a tree that is always balanced. The BTree is stored in the
37
37
  # specified directory and partially kept in memory to speed up operations.
38
38
  # The order of the tree specifies how many keys each node will be able to
@@ -40,16 +40,19 @@ module PEROBS
40
40
  # have N + 1 references to child nodes instead.
41
41
  class BTree
42
42
 
43
- attr_reader :order, :nodes, :node_cache
43
+ attr_reader :order, :nodes, :node_cache, :first_leaf, :last_leaf, :size
44
44
 
45
45
  # Create a new BTree object.
46
46
  # @param dir [String] Directory to store the tree file
47
47
  # @param name [String] Base name of the BTree related files in 'dir'
48
48
  # @param order [Integer] The maximum number of keys per node. This number
49
49
  # must be odd and larger than 2 and smaller than 2**16 - 1.
50
- def initialize(dir, name, order)
50
+ # @param progressmeter [ProgressMeter] reference to a ProgressMeter object
51
+ def initialize(dir, name, order, progressmeter)
51
52
  @dir = dir
52
53
  @name = name
54
+ @progressmeter = progressmeter
55
+
53
56
  unless order > 2
54
57
  PEROBS.log.fatal "BTree order must be larger than 2, not #{order}"
55
58
  end
@@ -62,9 +65,14 @@ module PEROBS
62
65
  @order = order
63
66
 
64
67
  # This EquiBlobsFile contains the nodes of the BTree.
65
- @nodes = EquiBlobsFile.new(@dir, @name,
68
+ @nodes = EquiBlobsFile.new(@dir, @name, @progressmeter,
66
69
  BTreeNode::node_bytes(@order))
67
- @node_cache = PersistentObjectCache.new(512, BTreeNode, self)
70
+ @nodes.register_custom_data('first_leaf')
71
+ @nodes.register_custom_data('last_leaf')
72
+ @nodes.register_custom_data('btree_size')
73
+ @node_cache = PersistentObjectCache.new(16384, 5000, BTreeNode, self)
74
+ @root = @first_leaf = @last_leaf = nil
75
+ @size = 0
68
76
 
69
77
  # This BTree implementation uses a write cache to improve write
70
78
  # performance of multiple successive read/write operations. This also
@@ -89,23 +97,46 @@ module PEROBS
89
97
 
90
98
  @node_cache.clear
91
99
  @nodes.open
92
- node = @nodes.total_entries == 0 ?
93
- BTreeNode::create(self) :
94
- BTreeNode::load(self, @nodes.first_entry)
100
+
101
+ if @nodes.total_entries == 0
102
+ # We've created a new nodes file
103
+ node = BTreeNode::create(self)
104
+ else
105
+ # We are loading an existing tree.
106
+ node = BTreeNode::load_and_link(self, @nodes.first_entry)
107
+ @first_leaf = BTreeNode::load_and_link(
108
+ self, @nodes.get_custom_data('first_leaf'))
109
+ @last_leaf = BTreeNode::load_and_link(
110
+ self, @nodes.get_custom_data('last_leaf'))
111
+ end
95
112
  set_root(node)
113
+
114
+ # Get the total number of entries that are stored in the tree.
115
+ @size = @nodes.get_custom_data('btree_size')
96
116
  end
97
117
 
98
118
  # Close the tree file.
99
119
  def close
120
+
121
+ def val_perc(value, total)
122
+ "#{value} (#{(value.to_f / total*100.0).to_i}%)"
123
+ end
124
+
100
125
  sync
101
126
  @nodes.close
102
127
  @root = nil
103
128
  end
104
129
 
130
+ # @return true if file is currently open
131
+ def is_open?
132
+ !@root.nil?
133
+ end
134
+
105
135
  # Clear all pools and forget any registered spaces.
106
136
  def clear
107
137
  @node_cache.clear
108
138
  @nodes.clear
139
+ @size = 0
109
140
  set_root(BTreeNode::create(self))
110
141
  end
111
142
 
@@ -114,6 +145,7 @@ module PEROBS
114
145
  # all stored data from the BTree.
115
146
  def erase
116
147
  @nodes.erase
148
+ @size = 0
117
149
  @root = nil
118
150
  @dirty_flag.forced_unlock
119
151
  end
@@ -121,6 +153,7 @@ module PEROBS
121
153
  # Flush all pending modifications into the tree file.
122
154
  def sync
123
155
  @node_cache.flush(true)
156
+ @nodes.set_custom_data('btree_size', @size)
124
157
  @nodes.sync
125
158
  @dirty_flag.unlock if @dirty_flag.is_locked?
126
159
  end
@@ -128,21 +161,54 @@ module PEROBS
128
161
  # Check if the tree file contains any errors.
129
162
  # @return [Boolean] true if no erros were found, false otherwise
130
163
  def check(&block)
131
- @root.check(&block)
164
+ sync
165
+ return false unless @nodes.check
166
+
167
+ entries = 0
168
+ res = true
169
+ @progressmeter.start('Checking index structure', @size) do |pm|
170
+ res = @root.check do |k, v|
171
+ pm.update(entries += 1)
172
+ block_given? ? yield(k, v) : true
173
+ end
174
+ end
175
+
176
+ unless entries == @size
177
+ PEROBS.log.error "The BTree size (#{@size}) and the number of " +
178
+ "found entries (#{entries}) don't match"
179
+ return false
180
+ end
181
+
182
+ res
132
183
  end
133
184
 
134
185
  # Register a new node as root node of the tree.
186
+ # @param node [BTreeNode]
135
187
  def set_root(node)
136
188
  @root = node
137
189
  @nodes.first_entry = node.node_address
138
190
  end
139
191
 
192
+ # Set the address of the first leaf node.
193
+ # @param node [BTreeNode]
194
+ def set_first_leaf(node)
195
+ @first_leaf = node
196
+ @nodes.set_custom_data('first_leaf', node.node_address)
197
+ end
198
+
199
+ # Set the address of the last leaf node.
200
+ # @param node [BTreeNode]
201
+ def set_last_leaf(node)
202
+ @last_leaf = node
203
+ @nodes.set_custom_data('last_leaf', node.node_address)
204
+ end
205
+
140
206
  # Insert a new value into the tree using the key as a unique index. If the
141
207
  # key already exists the old value will be overwritten.
142
208
  # @param key [Integer] Unique key
143
209
  # @param value [Integer] value
144
210
  def insert(key, value)
145
- @root.insert(key, value)
211
+ @size += 1 if @root.insert(key, value)
146
212
  @node_cache.flush
147
213
  end
148
214
 
@@ -157,7 +223,7 @@ module PEROBS
157
223
  # Find and remove the value associated with the given key. If no entry was
158
224
  # found, return nil, otherwise the found value.
159
225
  def remove(key)
160
- removed_value = @root.remove(key)
226
+ @size -= 1 unless (removed_value = @root.remove(key)).nil?
161
227
 
162
228
  # Check if the root node only contains one child link after the delete
163
229
  # operation. Then we can delete that node and pull the tree one level
@@ -187,6 +253,11 @@ module PEROBS
187
253
  @nodes.delete_blob(address)
188
254
  end
189
255
 
256
+ # @return [Integer] The number of entries stored in the tree.
257
+ def entries_count
258
+ @size
259
+ end
260
+
190
261
  # @return [String] Human reable form of the tree.
191
262
  def to_s
192
263
  @root.to_s
@@ -33,7 +33,7 @@ require 'perobs/RobustFile'
33
33
  module PEROBS
34
34
 
35
35
  # This class manages the usage of the data blobs in the corresponding
36
- # HashedBlobsDB object.
36
+ # BTreeDB object.
37
37
  class BTreeBlob
38
38
 
39
39
  # Magic number used for index files.
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = BTreeDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -58,7 +58,7 @@ module PEROBS
58
58
  # nodes. The insert/find/delete time grows
59
59
  # linearly with the size.
60
60
  def initialize(db_name, options = {})
61
- super(options[:serializer] || :json)
61
+ super(options)
62
62
 
63
63
  @db_dir = db_name
64
64
  # Create the database directory if it doesn't exist yet.
@@ -39,7 +39,8 @@ module PEROBS
39
39
  # mark a node as leaf or branch node.
40
40
  class BTreeNode
41
41
 
42
- attr_reader :node_address, :parent, :is_leaf, :keys, :values, :children
42
+ attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling,
43
+ :keys, :values, :children
43
44
 
44
45
  # Create a new BTreeNode object for the given tree with the given parent
45
46
  # or recreate the node with the given node_address from the backing store.
@@ -48,18 +49,23 @@ module PEROBS
48
49
  # restore the node.
49
50
  # @param tree [BTree] The tree this node is part of
50
51
  # @param parent [BTreeNode] reference to parent node
52
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
53
+ # @param next_sibling [BTreeNode] reference to next sibling node
51
54
  # @param node_address [Integer] the address of the node to read from the
52
55
  # backing store
53
56
  # @param is_leaf [Boolean] true if the node should be a leaf node, false
54
57
  # if not
55
58
  def initialize(tree, node_address = nil, parent = nil, is_leaf = true,
59
+ prev_sibling = nil, next_sibling = nil,
56
60
  keys = [], values = [], children = [])
57
61
  @tree = tree
58
62
  if node_address == 0
59
63
  PEROBS.log.fatal "Node address may not be 0"
60
64
  end
61
65
  @node_address = node_address
62
- @parent = parent ? BTreeNodeLink.new(tree, parent) : nil
66
+ @parent = link(parent)
67
+ @prev_sibling = link(prev_sibling)
68
+ @next_sibling = link(next_sibling)
63
69
  @keys = keys
64
70
  if (@is_leaf = is_leaf)
65
71
  @values = values
@@ -68,18 +74,6 @@ module PEROBS
68
74
  @children = children
69
75
  @values = []
70
76
  end
71
-
72
- ObjectSpace.define_finalizer(
73
- self, BTreeNode._finalize(@tree, @node_address, object_id))
74
- @tree.node_cache.insert(self, false)
75
- end
76
-
77
- # This method generates the destructor for the objects of this class. It
78
- # is done this way to prevent the Proc object hanging on to a reference to
79
- # self which would prevent the object from being collected. This internal
80
- # method is not intended for users to call.
81
- def BTreeNode::_finalize(tree, node_address, ruby_object_id)
82
- proc { tree.node_cache._collect(node_address, ruby_object_id) }
83
77
  end
84
78
 
85
79
  # Create a new SpaceTreeNode. This method should be used for the creation
@@ -88,7 +82,10 @@ module PEROBS
88
82
  # @param parent [BTreeNode] The parent node
89
83
  # @param is_leaf [Boolean] True if the node has no children, false
90
84
  # otherwise
91
- def BTreeNode::create(tree, parent = nil, is_leaf = true)
85
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
86
+ # @param next_sibling [BTreeNode] reference to next sibling node
87
+ def BTreeNode::create(tree, parent = nil, is_leaf = true,
88
+ prev_sibling = nil, next_sibling = nil)
92
89
  unless parent.nil? || parent.is_a?(BTreeNode) ||
93
90
  parent.is_a?(BTreeNodeLink)
94
91
  PEROBS.log.fatal "Parent node must be a BTreeNode but is of class " +
@@ -96,17 +93,26 @@ module PEROBS
96
93
  end
97
94
 
98
95
  address = tree.nodes.free_address
99
- node = BTreeNode.new(tree, address, parent, is_leaf)
96
+ node = BTreeNode.new(tree, address, parent, is_leaf, prev_sibling,
97
+ next_sibling)
100
98
  # This is a new node. Make sure the data is written to the file.
101
99
  tree.node_cache.insert(node)
102
100
 
103
- node
101
+ # Insert the newly created node into the existing node chain.
102
+ if (node.prev_sibling = prev_sibling)
103
+ node.prev_sibling.next_sibling = BTreeNodeLink.new(tree, node)
104
+ end
105
+ if (node.next_sibling = next_sibling)
106
+ node.next_sibling.prev_sibling = BTreeNodeLink.new(tree, node)
107
+ end
108
+
109
+ BTreeNodeLink.new(tree, node)
104
110
  end
105
111
 
106
112
  # Restore a node from the backing store at the given address and tree.
107
113
  # @param tree [BTree] The tree the node belongs to
108
- # @param node_address [Integer] The address in the blob file.
109
- def BTreeNode::load(tree, address)
114
+ # @param address [Integer] The address in the blob file.
115
+ def BTreeNode::load(tree, address, unused = nil)
110
116
  unless address.is_a?(Integer)
111
117
  PEROBS.log.fatal "address is not Integer: #{address.class}"
112
118
  end
@@ -130,19 +136,21 @@ module PEROBS
130
136
  data_count = ary[2]
131
137
  # Read the parent node address
132
138
  parent = ary[3] == 0 ? nil : BTreeNodeLink.new(tree, ary[3])
139
+ prev_sibling = ary[4] == 0 ? nil : BTreeNodeLink.new(tree, ary[4])
140
+ next_sibling = ary[5] == 0 ? nil : BTreeNodeLink.new(tree, ary[5])
133
141
  # Read the keys
134
- keys = ary[4, key_count]
142
+ keys = ary[6, key_count]
135
143
 
136
144
  children = nil
137
145
  values = nil
138
146
  if is_leaf
139
147
  # Read the values
140
- values = ary[4 + tree.order, data_count]
148
+ values = ary[6 + tree.order, data_count]
141
149
  else
142
150
  # Read the child addresses
143
151
  children = []
144
152
  data_count.times do |i|
145
- child_address = ary[4 + tree.order + i]
153
+ child_address = ary[6 + tree.order + i]
146
154
  unless child_address > 0
147
155
  PEROBS.log.fatal "Child address must be larger than 0"
148
156
  end
@@ -150,17 +158,28 @@ module PEROBS
150
158
  end
151
159
  end
152
160
 
153
- node = BTreeNode.new(tree, address, parent, is_leaf, keys, values,
161
+ node = BTreeNode.new(tree, address, parent, is_leaf,
162
+ prev_sibling, next_sibling, keys, values,
154
163
  children)
155
164
  tree.node_cache.insert(node, false)
156
165
 
157
166
  node
158
167
  end
159
168
 
169
+ # This is a wrapper around BTreeNode::load() that returns a BTreeNodeLink
170
+ # instead of the actual node.
171
+ # @param tree [BTree] The tree the node belongs to
172
+ # @param address [Integer] The address in the blob file.
173
+ # @return [BTreeNodeLink] Link to loaded noded
174
+ def BTreeNode::load_and_link(tree, address)
175
+ BTreeNodeLink.new(tree, BTreeNode::load(tree, address))
176
+ end
177
+
178
+
160
179
  # @return [String] The format used for String.pack.
161
180
  def BTreeNode::node_bytes_format(tree)
162
181
  # This does not include the 4 bytes for the CRC32 checksum
163
- "CSSQQ#{tree.order}Q#{tree.order + 1}"
182
+ "CSSQQQQ#{tree.order}Q#{tree.order + 1}"
164
183
  end
165
184
 
166
185
  # @return [Integer] The number of bytes needed to store a node.
@@ -169,6 +188,8 @@ module PEROBS
169
188
  2 + # actual key count
170
189
  2 + # actual value or children count (aka data count)
171
190
  8 + # parent address
191
+ 8 + # previous sibling address
192
+ 8 + # next sibling address
172
193
  8 * order + # keys
173
194
  8 * (order + 1) + # values or child addresses
174
195
  4 # CRC32 checksum
@@ -200,8 +221,7 @@ module PEROBS
200
221
 
201
222
  # Once we have reached a leaf node we can insert or replace the value.
202
223
  if node.is_leaf
203
- node.insert_element(key, value)
204
- return
224
+ return node.insert_element(key, value)
205
225
  else
206
226
  # Descend into the right child node to add the value to.
207
227
  node = node.children[node.search_key_index(key)]
@@ -269,18 +289,19 @@ module PEROBS
269
289
  def split_node
270
290
  unless @parent
271
291
  # The node is the root node. We need to create a parent node first.
272
- self.parent = BTreeNode::create(@tree, nil, false)
292
+ self.parent = link(BTreeNode::create(@tree, nil, false))
273
293
  @parent.set_child(0, self)
274
294
  @tree.set_root(@parent)
275
295
  end
276
296
 
277
297
  # Create the new sibling that will take the 2nd half of the
278
298
  # node content.
279
- sibling = BTreeNode::create(@tree, @parent, @is_leaf)
299
+ sibling = BTreeNode::create(@tree, @parent, @is_leaf, link(self),
300
+ @next_sibling)
280
301
  # Determine the index of the middle element that gets moved to the
281
302
  # parent. The order must be an uneven number, so adding 1 will get us
282
303
  # the middle element.
283
- mid = @tree.order / 2 + 1
304
+ mid = @tree.order / 2
284
305
  # Insert the middle element key into the parent node
285
306
  @parent.insert_element(@keys[mid], sibling)
286
307
  copy_elements(mid + (@is_leaf ? 0 : 1), sibling)
@@ -297,6 +318,9 @@ module PEROBS
297
318
  insert_element(@parent.keys[parent_index], upper_sibling.children[0])
298
319
  end
299
320
  upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size)
321
+ if (@next_sibling = link(upper_sibling.next_sibling))
322
+ @next_sibling.prev_sibling = link(self)
323
+ end
300
324
  @tree.delete_node(upper_sibling.node_address)
301
325
 
302
326
  @parent.remove_element(parent_index)
@@ -307,6 +331,7 @@ module PEROBS
307
331
  # @param key [Integer] key to address the value or child
308
332
  # @param value_or_child [Integer or BTreeNode] value or BTreeNode
309
333
  # reference
334
+ # @return true for insert, false for overwrite
310
335
  def insert_element(key, value_or_child)
311
336
  if @keys.size >= @tree.order
312
337
  PEROBS.log.fatal "Cannot insert into a full BTreeNode"
@@ -319,51 +344,48 @@ module PEROBS
319
344
  if is_leaf
320
345
  @values[i] = value_or_child
321
346
  else
322
- @children[i + 1] = BTreeNodeLink.new(@tree, value_or_child)
347
+ @children[i + 1] = link(value_or_child)
323
348
  end
349
+ @tree.node_cache.insert(self)
350
+
351
+ return false
324
352
  else
325
353
  # Create a new entry
326
354
  @keys.insert(i, key)
327
355
  if is_leaf
328
356
  @values.insert(i, value_or_child)
329
357
  else
330
- @children.insert(i + 1, BTreeNodeLink.new(@tree, value_or_child))
358
+ @children.insert(i + 1, link(value_or_child))
331
359
  end
360
+ @tree.node_cache.insert(self)
361
+
362
+ return true
332
363
  end
333
- @tree.node_cache.insert(self)
334
364
  end
335
365
 
336
366
  # Remove the element at the given index.
337
367
  def remove_element(index)
338
- # We need this key to find the link in the parent node.
339
- first_key = @keys[0]
340
- removed_value = nil
341
-
342
368
  # Delete the key at the specified index.
343
- unless @keys.delete_at(index)
344
- PEROBS.log.fatal "Could not remove element #{index} from BTreeNode " +
369
+ unless (key = @keys.delete_at(index))
370
+ PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
345
371
  "@#{@node_address}"
346
372
  end
347
- if @is_leaf
348
- # For leaf nodes, also delete the corresponding value.
349
- removed_value = @values.delete_at(index)
350
- else
351
- # The corresponding child has can be found at 1 index higher.
352
- @children.delete_at(index + 1)
353
- end
354
- @tree.node_cache.insert(self)
373
+ update_branch_key(key) if index == 0
355
374
 
356
- # Find the lower and upper siblings and the index of the key for this
357
- # node in the parent node.
358
- lower_sibling, upper_sibling, parent_index =
359
- find_closest_siblings(first_key)
375
+ # Delete the corresponding value.
376
+ removed_value = @values.delete_at(index)
377
+ @tree.node_cache.insert(self)
360
378
 
361
- if lower_sibling &&
362
- lower_sibling.keys.size + @keys.size < @tree.order
363
- lower_sibling.merge_node(self, parent_index - 1)
364
- elsif upper_sibling &&
365
- @keys.size + upper_sibling.keys.size < @tree.order
366
- merge_node(upper_sibling, parent_index)
379
+ if @keys.length < min_keys
380
+ if @prev_sibling && @prev_sibling.parent == @parent
381
+ borrow_from_previous_sibling(@prev_sibling) ||
382
+ @prev_sibling.merge_with_leaf_node(self)
383
+ elsif @next_sibling && @next_sibling.parent == @parent
384
+ borrow_from_next_sibling(@next_sibling) ||
385
+ merge_with_leaf_node(@next_sibling)
386
+ elsif @parent
387
+ PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
388
+ end
367
389
  end
368
390
 
369
391
  # The merge has potentially invalidated this node. After this method has
@@ -371,7 +393,88 @@ module PEROBS
371
393
  removed_value
372
394
  end
373
395
 
396
+ def remove_child(node)
397
+ unless (index = search_node_index(node))
398
+ PEROBS.log.fatal "Cannot remove child #{node.node_address} " +
399
+ "from node #{@node_address}"
400
+ end
401
+
402
+ @tree.node_cache.insert(self)
403
+ if index == 0
404
+ # Removing the first child is a bit more complicated as the
405
+ # corresponding branch key is in a parent node.
406
+ key = @keys.shift
407
+ update_branch_key(key)
408
+ else
409
+ # For all other children we can just remove the corresponding key.
410
+ @keys.delete_at(index - 1)
411
+ end
412
+
413
+ # Remove the child node link.
414
+ child = @children.delete_at(index)
415
+ # Unlink the neighbouring siblings from the child
416
+ child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
417
+ child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
418
+
419
+ if @keys.length < min_keys
420
+ # The node has become too small. Try borrowing a node from an adjecent
421
+ # sibling or merge with an adjecent node.
422
+ if @prev_sibling && @prev_sibling.parent == @parent
423
+ borrow_from_previous_sibling(@prev_sibling) ||
424
+ @prev_sibling.merge_with_branch_node(self)
425
+ elsif @next_sibling && @next_sibling.parent == @parent
426
+ borrow_from_next_sibling(@next_sibling) ||
427
+ merge_with_branch_node(@next_sibling)
428
+ end
429
+ end
430
+
431
+ if @parent.nil? && @children.length == 1
432
+ # If the node just below the root only has one child it will become
433
+ # the new root node.
434
+ new_root = @children.first
435
+ new_root.parent = nil
436
+ @tree.set_root(new_root)
437
+ end
438
+ end
439
+
440
+ def merge_with_leaf_node(node)
441
+ if @keys.length + node.keys.length > @tree.order
442
+ PEROBS.log.fatal "Leaf nodes are too big to merge"
443
+ end
444
+
445
+ @keys += node.keys
446
+ @values += node.values
447
+ @tree.node_cache.insert(self)
448
+
449
+ node.parent.remove_child(node)
450
+ end
451
+
452
+ def merge_with_branch_node(node)
453
+ if @keys.length + 1 + node.keys.length > @tree.order
454
+ PEROBS.log.fatal "Branch nodes are too big to merge"
455
+ end
456
+
457
+ index = @parent.search_node_index(node) - 1
458
+ @keys << @parent.keys[index]
459
+ @keys += node.keys
460
+ node.children.each { |c| c.parent = link(self) }
461
+ @children += node.children
462
+ @tree.node_cache.insert(self)
463
+
464
+ node.parent.remove_child(node)
465
+ end
466
+
467
+ def search_node_index(node)
468
+ index = search_key_index(node.keys.first)
469
+ unless @children[index] == node
470
+ raise RuntimeError, "Child at index #{index} is not the requested node"
471
+ end
472
+
473
+ index
474
+ end
475
+
374
476
  def copy_elements(src_idx, dest_node, dst_idx = 0, count = nil)
477
+ dest_node = dest_node.get_node
375
478
  unless count
376
479
  count = @tree.order - src_idx
377
480
  end
@@ -399,18 +502,47 @@ module PEROBS
399
502
  end
400
503
 
401
504
  def parent=(p)
402
- @parent = p ? BTreeNodeLink.new(@tree, p) : nil
505
+ @parent = p
506
+ @tree.node_cache.insert(self)
507
+
508
+ p
509
+ end
510
+
511
+ def prev_sibling=(node)
512
+ @prev_sibling = node
513
+ if node.nil? && @is_leaf
514
+ # If this node is a leaf node without a previous sibling we need to
515
+ # register it as the first leaf node.
516
+ @tree.set_first_leaf(BTreeNodeLink.new(@tree, self))
517
+ end
518
+
403
519
  @tree.node_cache.insert(self)
520
+
521
+ node
522
+ end
523
+
524
+ def next_sibling=(node)
525
+ @next_sibling = node
526
+ @tree.node_cache.insert(self)
527
+ if node.nil? && @is_leaf
528
+ # If this node is a leaf node without a next sibling we need to
529
+ # register it as the last leaf node.
530
+ @tree.set_last_leaf(BTreeNodeLink.new(@tree, self))
531
+ end
532
+
533
+ node
404
534
  end
405
535
 
406
536
  def set_child(index, child)
407
537
  if child
408
- @children[index] = BTreeNodeLink.new(@tree, child)
409
- @children[index].parent = self
538
+ @children[index] = link(child)
539
+ @children[index].parent = link(self)
410
540
  else
411
541
  @children[index] = nil
412
542
  end
413
543
  @tree.node_cache.insert(self)
544
+
545
+ child
414
546
  end
415
547
 
416
548
  def trim(idx)
@@ -511,12 +643,20 @@ module PEROBS
511
643
  # @yield [key, value]
512
644
  # @return [Boolean] true if tree has no errors
513
645
  def check
646
+ branch_depth = nil
647
+
514
648
  traverse do |node, position, stack|
515
649
  if position == 0
516
- if node.parent && node.keys.size < 1
517
- node.error "BTreeNode must have at least one entry"
518
- return false
650
+ if node.parent
651
+ # After a split the nodes will only have half the maximum keys.
652
+ # For branch nodes one of the split nodes will have even 1 key
653
+ # less as this will become the branch key in a parent node.
654
+ if node.keys.size < min_keys - (node.is_leaf ? 0 : 1)
655
+ node.error "BTreeNode #{node.node_address} has too few keys"
656
+ return false
657
+ end
519
658
  end
659
+
520
660
  if node.keys.size > @tree.order
521
661
  node.error "BTreeNode must not have more then #{@tree.order} " +
522
662
  "keys, but has #{node.keys.size} keys"
@@ -529,16 +669,43 @@ module PEROBS
529
669
  "#{node.keys.inspect}"
530
670
  return false
531
671
  end
672
+ last_key = key
532
673
  end
533
674
 
534
675
  if node.is_leaf
676
+ if branch_depth
677
+ unless branch_depth == stack.size
678
+ node.error "All leaf nodes must have same distance from root "
679
+ return false
680
+ end
681
+ else
682
+ branch_depth = stack.size
683
+ end
684
+ if node.prev_sibling.nil? && @tree.first_leaf != node
685
+ node.error "Leaf node #{node.node_address} has no previous " +
686
+ "sibling but is not the first leaf of the tree"
687
+ return false
688
+ end
689
+ if node.next_sibling.nil? && @tree.last_leaf != node
690
+ node.error "Leaf node #{node.node_address} has no next sibling " +
691
+ "but is not the last leaf of the tree"
692
+ return false
693
+ end
535
694
  unless node.keys.size == node.values.size
536
695
  node.error "Key count (#{node.keys.size}) and value " +
537
696
  "count (#{node.values.size}) don't match"
538
697
  return false
539
698
  end
699
+ unless node.children.empty?
700
+ node.error "@children must be nil for a leaf node"
701
+ return false
702
+ end
540
703
  else
541
- unless node.keys.size == node.children.size - 1
704
+ unless node.values.empty?
705
+ node.error "@values must be nil for a branch node"
706
+ return false
707
+ end
708
+ unless node.children.size == node.keys.size + 1
542
709
  node.error "Key count (#{node.keys.size}) must be one " +
543
710
  "less than children count (#{node.children.size})"
544
711
  return false
@@ -551,10 +718,10 @@ module PEROBS
551
718
  end
552
719
  unless child.parent.is_a?(BTreeNodeLink)
553
720
  node.error "Parent reference of child #{i} is of class " +
554
- "#{child.class} instead of BTreeNodeLink"
721
+ "#{child.parent.class} instead of BTreeNodeLink"
555
722
  return false
556
723
  end
557
- if child.node_address == node.node_address
724
+ if child == node
558
725
  node.error "Child #{i} points to self"
559
726
  return false
560
727
  end
@@ -567,6 +734,22 @@ module PEROBS
567
734
  "to this node"
568
735
  return false
569
736
  end
737
+ if i > 0
738
+ unless node.children[i - 1].next_sibling == child
739
+ node.error "next_sibling of node " +
740
+ "#{node.children[i - 1].node_address} " +
741
+ "must point to node #{child.node_address}"
742
+ return false
743
+ end
744
+ end
745
+ if i < node.children.length - 1
746
+ unless child == node.children[i + 1].prev_sibling
747
+ node.error "prev_sibling of node " +
748
+ "#{node.children[i + 1].node_address} " +
749
+ "must point to node #{child.node_address}"
750
+ return false
751
+ end
752
+ end
570
753
  end
571
754
  end
572
755
  elsif position <= node.keys.size
@@ -580,8 +763,7 @@ module PEROBS
580
763
  "Must be smaller than #{node.keys[index]}."
581
764
  return false
582
765
  end
583
- unless node.children[position].keys.first >=
584
- node.keys[index]
766
+ unless node.children[position].keys.first >= node.keys[index]
585
767
  node.error "Child #{node.children[position].node_address} " +
586
768
  "has too small key #{node.children[position].keys.first}. " +
587
769
  "Must be larger than or equal to #{node.keys[index]}."
@@ -671,13 +853,26 @@ module PEROBS
671
853
  s += ' ^@'
672
854
  end
673
855
  end
856
+ if @prev_sibling
857
+ begin
858
+ s += " <#{@prev_sibling.node_address}"
859
+ rescue
860
+ s += ' <@'
861
+ end
862
+ end
863
+ if @next_sibling
864
+ begin
865
+ s += " >#{@next_sibling.node_address}"
866
+ rescue
867
+ s += ' >@'
868
+ end
869
+ end
674
870
 
675
871
  s
676
872
  end
677
873
 
678
874
  def error(msg)
679
- PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}\n" +
680
- @tree.to_s
875
+ PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}"
681
876
  end
682
877
 
683
878
  def write_node
@@ -685,7 +880,9 @@ module PEROBS
685
880
  @is_leaf ? 1 : 0,
686
881
  @keys.size,
687
882
  @is_leaf ? @values.size : @children.size,
688
- @parent ? @parent.node_address : 0
883
+ @parent ? @parent.node_address : 0,
884
+ @prev_sibling ? @prev_sibling.node_address : 0,
885
+ @next_sibling ? @next_sibling.node_address : 0
689
886
  ] + @keys + ::Array.new(@tree.order - @keys.size, 0)
690
887
 
691
888
  if @is_leaf
@@ -708,24 +905,107 @@ module PEROBS
708
905
 
709
906
  private
710
907
 
711
- def find_closest_siblings(key)
712
- # The root node has no siblings.
713
- return [ nil, nil, nil ] unless @parent
908
+ def min_keys
909
+ @tree.order / 2
910
+ end
911
+
912
+ def link(node)
913
+ return nil if node.nil?
914
+
915
+ if node.is_a?(BTreeNodeLink)
916
+ return node
917
+ elsif node.is_a?(BTreeNode) || node.is_a?(Integer)
918
+ return BTreeNodeLink.new(@tree, node)
919
+ else
920
+ PEROBS.log.fatal "Node link must be a BTreeNode, not a #{node.class}"
921
+ end
922
+ end
923
+
924
+ # Try to borrow an element from the preceding sibling.
925
+ # @return [True or False] True if an element was borrowed, false
926
+ # otherwise.
927
+ def borrow_from_previous_sibling(prev_node)
928
+ if prev_node.keys.length - 1 > min_keys
929
+ index = @parent.search_node_index(self) - 1
930
+
931
+ @tree.node_cache.insert(self)
932
+ @tree.node_cache.insert(prev_node.get_node)
933
+ @tree.node_cache.insert(@parent.get_node)
934
+ if @is_leaf
935
+ # Move the last key of the previous node to the front of this node
936
+ @keys.unshift(prev_node.keys.pop)
937
+ # Register the new lead key of this node with its parent
938
+ @parent.keys[index] = @keys.first
939
+ # Move the last value of the previous node to the front of this node
940
+ @values.unshift(prev_node.values.pop)
941
+ else
942
+ # For branch nodes the branch key will be the borrowed key.
943
+ @keys.unshift(@parent.keys[index])
944
+ # And the last key of the previous key will become the new branch
945
+ # key for this node.
946
+ @parent.keys[index] = prev_node.keys.pop
947
+ # Move the last child of the previous node to the front of this node
948
+ @children.unshift(node = prev_node.children.pop)
949
+ node.parent = link(self)
950
+ end
951
+
952
+ return true
953
+ end
954
+
955
+ false
956
+ end
957
+
958
+ # Try to borrow an element from the next sibling.
959
+ # @return [True or False] True if an element was borrowed, false
960
+ # otherwise.
961
+ def borrow_from_next_sibling(next_node)
962
+ if next_node.keys.length - 1 > min_keys
963
+ # The next sibling now has a new lead key that requires the branch key
964
+ # to be updated in the parent node.
965
+ index = next_node.parent.search_node_index(next_node) - 1
966
+
967
+ @tree.node_cache.insert(self)
968
+ @tree.node_cache.insert(next_node.get_node)
969
+ @tree.node_cache.insert(next_node.parent.get_node)
970
+ if @is_leaf
971
+ # Move the first key of the next node to the end of the this node
972
+ @keys << next_node.keys.shift
973
+ # Register the new lead key of next_node with its parent
974
+ next_node.parent.keys[index] = next_node.keys.first
975
+ # Move the first value of the next node to the end of this node
976
+ @values << next_node.values.shift
977
+ else
978
+ # For branch nodes we need to get the lead key from the parent of
979
+ # next_node.
980
+ @keys << next_node.parent.keys[index]
981
+ # The old lead key of next_node becomes the branch key in the parent
982
+ # of next_node. And the keys of next_node are shifted.
983
+ next_node.parent.keys[index] = next_node.keys.shift
984
+ # Move the first child of the next node to the end of this node
985
+ @children << (node = next_node.children.shift)
986
+ node.parent = link(self)
987
+ end
988
+
989
+ return true
990
+ end
991
+
992
+ false
993
+ end
994
+
995
+ def update_branch_key(old_key)
996
+ new_key = @keys.first
997
+ return unless (node = @parent)
714
998
 
715
- parent_index = @parent.search_key_index(key)
716
- unless @parent.children[parent_index] == self
717
- PEROBS.log.fatal "Failed to find self in parent"
999
+ while node
1000
+ if (index = node.keys.index(old_key))
1001
+ node.keys[index] = new_key
1002
+ @tree.node_cache.insert(node.get_node)
1003
+ return
1004
+ end
1005
+ node = node.parent
718
1006
  end
719
- # The child that corresponds to the key at parent_index has an index of
720
- # parent_index + 1! The lower_sibling has an child index of
721
- # parent_index and the upper sibling has a child index of parent_index +
722
- # 2.
723
- lower_sibling = parent_index < 1 ?
724
- nil : @parent.children[parent_index - 1]
725
- upper_sibling = parent_index >= (@parent.children.size - 1) ?
726
- nil : @parent.children[parent_index + 1]
727
1007
 
728
- [ lower_sibling, upper_sibling, parent_index ]
1008
+ # The smallest element has no branch key.
729
1009
  end
730
1010
 
731
1011
  end