perobs 4.1.0 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4da7810046e0984b00152ab2205ebb2e2d364b94
4
- data.tar.gz: 74c48af35b674713b222d0dff1b7f0c3ef070b45
2
+ SHA256:
3
+ metadata.gz: 7790ee42586bb2b8fca115f93ed277c4a8057f7a7027b356baea7b066da953e5
4
+ data.tar.gz: 110e0710a84ef544a4874cf868ec1662dfc900077d1894b24f53bcdaeaeeed34
5
5
  SHA512:
6
- metadata.gz: 5746476c388361dee6ac06094e7545e86ecb0569368c089e21429e4e6c3dbf92d8db5e7854249d7214c162f15a629306092dd0ca8bfbceb85eda7c08df95cf90
7
- data.tar.gz: a495a6127f26bdc7326f1bd7ea3671ff9add499d0b455a9334c884c23a2960ee963eedb8d2bcb5c3f257af9dde4895bebd458a96e7c079dbd0752028e9200614
6
+ metadata.gz: d95d845c7e8bd183f53b60369415bde86cd766c224bcbc2c52c870c60542a786f359a63eecc4e8829055cee6a1674bc952277749da183432b4b3abae7536efbb
7
+ data.tar.gz: 5fa1712fb01118d955d86396aec87b319c75856f6602ccf1a19f475a3dc64dc65a540e35271e6f2c6c7ee6a10c5cb03da30d43945a0dc264c7b0e48f575269d1
@@ -53,8 +53,8 @@ module PEROBS
53
53
  @name = name
54
54
  @progressmeter = progressmeter
55
55
 
56
- unless order > 2
57
- PEROBS.log.fatal "BTree order must be larger than 2, not #{order}"
56
+ unless order > 4
57
+ PEROBS.log.fatal "BTree order must be larger than 4, not #{order}"
58
58
  end
59
59
  unless order % 2 == 1
60
60
  PEROBS.log.fatal "BTree order must be an uneven number, not #{order}"
@@ -70,7 +70,7 @@ module PEROBS
70
70
  @nodes.register_custom_data('first_leaf')
71
71
  @nodes.register_custom_data('last_leaf')
72
72
  @nodes.register_custom_data('btree_size')
73
- @node_cache = PersistentObjectCache.new(16384, 5000, BTreeNode, self)
73
+ @node_cache = PersistentObjectCache.new(2**16, -1, BTreeNode, self)
74
74
  @root = @first_leaf = @last_leaf = nil
75
75
  @size = 0
76
76
 
@@ -117,12 +117,10 @@ module PEROBS
117
117
 
118
118
  # Close the tree file.
119
119
  def close
120
-
121
- def val_perc(value, total)
122
- "#{value} (#{(value.to_f / total*100.0).to_i}%)"
123
- end
124
-
125
120
  sync
121
+ PEROBS.log.info "BTree file #{@name} has currently " +
122
+ "#{@nodes.total_entries} used entries and #{@nodes.total_spaces} " +
123
+ "unused entries"
126
124
  @nodes.close
127
125
  @root = nil
128
126
  end
@@ -165,21 +163,34 @@ module PEROBS
165
163
  return false unless @nodes.check
166
164
 
167
165
  entries = 0
168
- res = true
166
+ stats = nil
169
167
  @progressmeter.start('Checking index structure', @size) do |pm|
170
- res = @root.check do |k, v|
168
+ stats = @root.check do |k, v|
171
169
  pm.update(entries += 1)
172
170
  block_given? ? yield(k, v) : true
173
171
  end
174
172
  end
175
173
 
174
+ return false unless stats
175
+
176
176
  unless entries == @size
177
177
  PEROBS.log.error "The BTree size (#{@size}) and the number of " +
178
178
  "found entries (#{entries}) don't match"
179
179
  return false
180
180
  end
181
+ unless stats.nodes_count == @nodes.total_entries
182
+ PEROBS.log.error "The BTree nodes count (#{stats.nodes_count}) and " +
183
+ "the number of entries in the nodes file (#{@nodes.total_entries}) " +
184
+ "don't match"
185
+ return false
186
+ end
187
+ PEROBS.log.info "Statistics for the BTree #{@name}: " +
188
+ "Number of nodes: #{stats.nodes_count}; " +
189
+ "Branch depth: #{stats.branch_depth}; " +
190
+ "Number of leave nodes: #{stats.leave_nodes}; " +
191
+ "Number of leaves: #{stats.leaves}"
181
192
 
182
- res
193
+ !stats.nil?
183
194
  end
184
195
 
185
196
  # Register a new node as root node of the tree.
@@ -208,8 +219,10 @@ module PEROBS
208
219
  # @param key [Integer] Unique key
209
220
  # @param value [Integer] value
210
221
  def insert(key, value)
211
- @size += 1 if @root.insert(key, value)
212
- @node_cache.flush
222
+ if @root.insert(key, value)
223
+ @size += 1
224
+ @node_cache.flush
225
+ end
213
226
  end
214
227
 
215
228
  # Retrieve the value associated with the given key. If no entry was found,
@@ -220,6 +233,13 @@ module PEROBS
220
233
  @root.get(key)
221
234
  end
222
235
 
236
+ # Either return the key/value pair that exactly matches the key or a
237
+ # key/value pair that has a key that is at least min_miss_increment larger
238
+ # than the key.
239
+ def get_best_match(key, min_miss_increment)
240
+ @root.get_best_match(key, min_miss_increment)
241
+ end
242
+
223
243
  # Find and remove the value associated with the given key. If no entry was
224
244
  # found, return nil, otherwise the found value.
225
245
  def remove(key)
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = BTreeBlob.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -144,11 +144,12 @@ module PEROBS
144
144
 
145
145
  # Remove all entries from the index that have not been marked.
146
146
  # @return [Array] List of deleted object IDs.
147
- def delete_unmarked_entries
147
+ def delete_unmarked_entries(&block)
148
148
  deleted_ids = []
149
149
  # First remove the entry from the hash table.
150
150
  @entries_by_id.delete_if do |id, e|
151
151
  if e[MARKED] == 0
152
+ yield(id) if block_given?
152
153
  deleted_ids << id
153
154
  true
154
155
  else
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = BTreeDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -159,9 +160,9 @@ module PEROBS
159
160
  # Permanently delete all objects that have not been marked. Those are
160
161
  # orphaned and are no longer referenced by any actively used object.
161
162
  # @return [Array] List of IDs that have been removed from the DB.
162
- def delete_unmarked_objects
163
+ def delete_unmarked_objects(&block)
163
164
  deleted_ids = []
164
- each_blob { |blob| deleted_ids += blob.delete_unmarked_entries }
165
+ each_blob { |blob| deleted_ids += blob.delete_unmarked_entries(&block) }
165
166
  deleted_ids
166
167
  end
167
168
 
@@ -39,6 +39,8 @@ module PEROBS
39
39
  # mark a node as leaf or branch node.
40
40
  class BTreeNode
41
41
 
42
+ Stats = Struct.new(:branch_depth, :nodes_count, :leave_nodes, :leaves)
43
+
42
44
  attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling,
43
45
  :keys, :values, :children
44
46
 
@@ -225,6 +227,7 @@ module PEROBS
225
227
  else
226
228
  # Descend into the right child node to add the value to.
227
229
  node = node.children[node.search_key_index(key)]
230
+ node = node.get_node if node
228
231
  end
229
232
  end
230
233
 
@@ -249,6 +252,62 @@ module PEROBS
249
252
 
250
253
  # Descend into the right child node to continue the search.
251
254
  node = node.children[i]
255
+ node = node.get_node if node
256
+ end
257
+
258
+ PEROBS.log.fatal "Could not find proper node to get from while " +
259
+ "looking for key #{key}"
260
+ end
261
+
262
+ # Return the key/value pair that matches the given key or the next larger
263
+ # key/value pair with a key that is at least as large as key +
264
+ # min_miss_increment.
265
+ # @param key [Integer] key to search for
266
+ # @param min_miss_increment [Integer] minimum required key increment in
267
+ # case an exact key match could not be found
268
+ # @return [Integer or nil] value that matches the key
269
+ def get_best_match(key, min_miss_increment)
270
+ node = self
271
+
272
+ while node do
273
+ # Find index of the entry that best fits the key.
274
+ i = node.search_key_index(key)
275
+ if node.is_leaf
276
+ # This is a leaf node. Check if there is an exact match for the
277
+ # given key.
278
+ if node.keys[i] == key
279
+ # Return the corresponding value/value pair.
280
+ return [ key, node.values[i] ]
281
+ else
282
+ # No exact key match. Now search the larger keys for the first
283
+ # that is at least key + min_miss_increment large.
284
+ keys = node.keys
285
+ keys_length = keys.length
286
+ while node
287
+ if i >= keys_length
288
+ # We've reached the end of a node. Continue search in next
289
+ # sibling.
290
+ return nil unless (node = node.next_sibling)
291
+ node = node.get_node
292
+ keys = node.keys
293
+ keys_length = keys.length
294
+ i = -1
295
+ elsif keys[i] >= key + min_miss_increment
296
+ # We've found a key that fits the critera. Return the
297
+ # corresponding key/value pair.
298
+ return [ keys[i], node.values[i] ]
299
+ end
300
+
301
+ i += 1
302
+ end
303
+
304
+ return nil
305
+ end
306
+ end
307
+
308
+ # Descend into the right child node to continue the search.
309
+ node = node.children[i]
310
+ node = node.get_node if node
252
311
  end
253
312
 
254
313
  PEROBS.log.fatal "Could not find proper node to get from while " +
@@ -277,6 +336,7 @@ module PEROBS
277
336
 
278
337
  # Descend into the right child node to continue the search.
279
338
  node = node.children[i]
339
+ node = node.get_node if node
280
340
  end
281
341
 
282
342
  PEROBS.log.fatal 'Could not find proper node to remove from'
@@ -310,22 +370,6 @@ module PEROBS
310
370
  @parent
311
371
  end
312
372
 
313
- def merge_node(upper_sibling, parent_index)
314
- if upper_sibling == self
315
- PEROBS.log.fatal "Cannot merge node @#{@node_address} with self"
316
- end
317
- unless upper_sibling.is_leaf
318
- insert_element(@parent.keys[parent_index], upper_sibling.children[0])
319
- end
320
- upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size)
321
- if (@next_sibling = link(upper_sibling.next_sibling))
322
- @next_sibling.prev_sibling = link(self)
323
- end
324
- @tree.delete_node(upper_sibling.node_address)
325
-
326
- @parent.remove_element(parent_index)
327
- end
328
-
329
373
  # Insert the given value or child into the current node using the key as
330
374
  # index.
331
375
  # @param key [Integer] key to address the value or child
@@ -428,13 +472,8 @@ module PEROBS
428
472
  end
429
473
  end
430
474
 
431
- if @parent.nil? && @children.length == 1
432
- # If the node just below the root only has one child it will become
433
- # the new root node.
434
- new_root = @children.first
435
- new_root.parent = nil
436
- @tree.set_root(new_root)
437
- end
475
+ # Delete the node from the cache and backing store.
476
+ @tree.delete_node(node.node_address)
438
477
  end
439
478
 
440
479
  def merge_with_leaf_node(node)
@@ -561,36 +600,8 @@ module PEROBS
561
600
  # @param key [Integer] key to search for
562
601
  # @return [Integer] Index of the matching key or the insert position.
563
602
  def search_key_index(key)
564
- # Handle special case for empty keys list.
565
- return 0 if @keys.empty?
566
-
567
- # Keys are unique and always sorted. Use a binary search to find the
568
- # index that fits the given key.
569
- li = pi = 0
570
- ui = @keys.size - 1
571
- while li <= ui
572
- # The pivot element is always in the middle between the lower and upper
573
- # index.
574
- pi = li + (ui - li) / 2
575
-
576
- if key < @keys[pi]
577
- # The pivot element is smaller than the key. Set the upper index to
578
- # the pivot index.
579
- ui = pi - 1
580
- elsif key > @keys[pi]
581
- # The pivot element is larger than the key. Set the lower index to
582
- # the pivot index.
583
- li = pi + 1
584
- else
585
- # We've found an exact match. For leaf nodes return the found index.
586
- # For branch nodes we have to add one to the index since the larger
587
- # child is the right one.
588
- return @is_leaf ? pi : pi + 1
589
- end
590
- end
591
- # No exact match was found. For the insert operaton we need to return
592
- # the index of the first key that is larger than the given key.
593
- @keys[pi] < key ? pi + 1 : pi
603
+ (@is_leaf ? @keys.bsearch_index { |x| x >= key } :
604
+ @keys.bsearch_index { |x| x > key }) || @keys.length
594
605
  end
595
606
 
596
607
  # Iterate over all the key/value pairs in this node and all sub-nodes.
@@ -641,25 +652,28 @@ module PEROBS
641
652
  # Check consistency of the node and all subsequent nodes. In case an error
642
653
  # is found, a message is logged and false is returned.
643
654
  # @yield [key, value]
644
- # @return [Boolean] true if tree has no errors
655
+ # @return [nil or Hash] nil in case of errors or a hash with some
656
+ # statistical information about the tree
645
657
  def check
646
- branch_depth = nil
658
+ stats = Stats.new(nil, 0, 0, 0)
647
659
 
648
660
  traverse do |node, position, stack|
649
661
  if position == 0
662
+ stats.nodes_count += 1
650
663
  if node.parent
651
664
  # After a split the nodes will only have half the maximum keys.
652
665
  # For branch nodes one of the split nodes will have even 1 key
653
666
  # less as this will become the branch key in a parent node.
654
667
  if node.keys.size < min_keys - (node.is_leaf ? 0 : 1)
655
668
  node.error "BTreeNode #{node.node_address} has too few keys"
656
- return false
669
+ return nil
657
670
  end
658
671
  end
659
672
 
660
673
  if node.keys.size > @tree.order
661
674
  node.error "BTreeNode must not have more then #{@tree.order} " +
662
675
  "keys, but has #{node.keys.size} keys"
676
+ return nil
663
677
  end
664
678
 
665
679
  last_key = nil
@@ -667,79 +681,82 @@ module PEROBS
667
681
  if last_key && key < last_key
668
682
  node.error "Keys are not increasing monotoneously: " +
669
683
  "#{node.keys.inspect}"
670
- return false
684
+ return nil
671
685
  end
672
686
  last_key = key
673
687
  end
674
688
 
675
689
  if node.is_leaf
676
- if branch_depth
677
- unless branch_depth == stack.size
690
+ if stats.branch_depth
691
+ unless stats.branch_depth == node.tree_level
678
692
  node.error "All leaf nodes must have same distance from root "
679
- return false
693
+ return nil
680
694
  end
681
695
  else
682
- branch_depth = stack.size
696
+ stats.branch_depth = node.tree_level
683
697
  end
684
698
  if node.prev_sibling.nil? && @tree.first_leaf != node
685
699
  node.error "Leaf node #{node.node_address} has no previous " +
686
700
  "sibling but is not the first leaf of the tree"
687
- return false
701
+ return nil
688
702
  end
689
703
  if node.next_sibling.nil? && @tree.last_leaf != node
690
704
  node.error "Leaf node #{node.node_address} has no next sibling " +
691
705
  "but is not the last leaf of the tree"
692
- return false
706
+ return nil
693
707
  end
694
708
  unless node.keys.size == node.values.size
695
709
  node.error "Key count (#{node.keys.size}) and value " +
696
710
  "count (#{node.values.size}) don't match"
697
- return false
711
+ return nil
698
712
  end
699
713
  unless node.children.empty?
700
714
  node.error "@children must be nil for a leaf node"
701
- return false
715
+ return nil
702
716
  end
717
+
718
+ stats.leave_nodes += 1
719
+ stats.leaves += node.keys.length
703
720
  else
704
721
  unless node.values.empty?
705
722
  node.error "@values must be nil for a branch node"
706
- return false
723
+ return nil
707
724
  end
708
725
  unless node.children.size == node.keys.size + 1
709
726
  node.error "Key count (#{node.keys.size}) must be one " +
710
727
  "less than children count (#{node.children.size})"
711
- return false
728
+ return nil
712
729
  end
713
730
  node.children.each_with_index do |child, i|
714
731
  unless child.is_a?(BTreeNodeLink)
715
732
  node.error "Child #{i} is of class #{child.class} " +
716
733
  "instead of BTreeNodeLink"
717
- return false
734
+ return nil
718
735
  end
719
736
  unless child.parent.is_a?(BTreeNodeLink)
720
737
  node.error "Parent reference of child #{i} is of class " +
721
738
  "#{child.parent.class} instead of BTreeNodeLink"
722
- return false
739
+ return nil
723
740
  end
724
741
  if child == node
725
742
  node.error "Child #{i} points to self"
726
- return false
743
+ return nil
727
744
  end
728
745
  if stack.include?(child)
729
746
  node.error "Child #{i} points to ancester node"
730
- return false
747
+ return nil
731
748
  end
732
749
  unless child.parent == node
733
750
  node.error "Child #{i} does not have parent pointing " +
734
751
  "to this node"
735
- return false
752
+ return nil
736
753
  end
737
754
  if i > 0
738
755
  unless node.children[i - 1].next_sibling == child
739
756
  node.error "next_sibling of node " +
740
757
  "#{node.children[i - 1].node_address} " +
741
758
  "must point to node #{child.node_address}"
742
- return false
759
+ return nil
743
760
  end
744
761
  end
745
762
  if i < node.children.length - 1
@@ -747,7 +764,7 @@ module PEROBS
747
764
  node.error "prev_sibling of node " +
748
765
  "#{node.children[i + 1].node_address} " +
749
766
  "must point to node #{child.node_address}"
750
- return false
767
+ return nil
751
768
  end
752
769
  end
753
770
  end
@@ -761,24 +778,24 @@ module PEROBS
761
778
  node.error "Child #{node.children[index].node_address} " +
762
779
  "has too large key #{node.children[index].keys.last}. " +
763
780
  "Must be smaller than #{node.keys[index]}."
764
- return false
781
+ return nil
765
782
  end
766
783
  unless node.children[position].keys.first >= node.keys[index]
767
784
  node.error "Child #{node.children[position].node_address} " +
768
785
  "has too small key #{node.children[position].keys.first}. " +
769
786
  "Must be larger than or equal to #{node.keys[index]}."
770
- return false
787
+ return nil
771
788
  end
772
789
  else
773
790
  if block_given?
774
791
  # If a block was given, call this block with the key and value.
775
- return false unless yield(node.keys[index], node.values[index])
792
+ return nil unless yield(node.keys[index], node.values[index])
776
793
  end
777
794
  end
778
795
  end
779
796
  end
780
797
 
781
- true
798
+ stats
782
799
  end
783
800
 
784
801
  def is_top?
@@ -834,6 +851,7 @@ module PEROBS
834
851
 
835
852
  str = (is_last_child ? ' ' : ' |') + str
836
853
  node = node.parent
854
+ node = node.get_node if node
837
855
  end
838
856
 
839
857
  str
@@ -871,6 +889,17 @@ module PEROBS
871
889
  s
872
890
  end
873
891
 
892
+ def tree_level
893
+ level = 1
894
+ node = self
895
+ while (node = node.parent)
896
+ level += 1
897
+ end
898
+
899
+ level
900
+ end
901
+
902
+
874
903
  def error(msg)
875
904
  PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}"
876
905
  end