perobs 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4da7810046e0984b00152ab2205ebb2e2d364b94
4
- data.tar.gz: 74c48af35b674713b222d0dff1b7f0c3ef070b45
2
+ SHA256:
3
+ metadata.gz: 7790ee42586bb2b8fca115f93ed277c4a8057f7a7027b356baea7b066da953e5
4
+ data.tar.gz: 110e0710a84ef544a4874cf868ec1662dfc900077d1894b24f53bcdaeaeeed34
5
5
  SHA512:
6
- metadata.gz: 5746476c388361dee6ac06094e7545e86ecb0569368c089e21429e4e6c3dbf92d8db5e7854249d7214c162f15a629306092dd0ca8bfbceb85eda7c08df95cf90
7
- data.tar.gz: a495a6127f26bdc7326f1bd7ea3671ff9add499d0b455a9334c884c23a2960ee963eedb8d2bcb5c3f257af9dde4895bebd458a96e7c079dbd0752028e9200614
6
+ metadata.gz: d95d845c7e8bd183f53b60369415bde86cd766c224bcbc2c52c870c60542a786f359a63eecc4e8829055cee6a1674bc952277749da183432b4b3abae7536efbb
7
+ data.tar.gz: 5fa1712fb01118d955d86396aec87b319c75856f6602ccf1a19f475a3dc64dc65a540e35271e6f2c6c7ee6a10c5cb03da30d43945a0dc264c7b0e48f575269d1
@@ -53,8 +53,8 @@ module PEROBS
53
53
  @name = name
54
54
  @progressmeter = progressmeter
55
55
 
56
- unless order > 2
57
- PEROBS.log.fatal "BTree order must be larger than 2, not #{order}"
56
+ unless order > 4
57
+ PEROBS.log.fatal "BTree order must be larger than 4, not #{order}"
58
58
  end
59
59
  unless order % 2 == 1
60
60
  PEROBS.log.fatal "BTree order must be an uneven number, not #{order}"
@@ -70,7 +70,7 @@ module PEROBS
70
70
  @nodes.register_custom_data('first_leaf')
71
71
  @nodes.register_custom_data('last_leaf')
72
72
  @nodes.register_custom_data('btree_size')
73
- @node_cache = PersistentObjectCache.new(16384, 5000, BTreeNode, self)
73
+ @node_cache = PersistentObjectCache.new(2**16, -1, BTreeNode, self)
74
74
  @root = @first_leaf = @last_leaf = nil
75
75
  @size = 0
76
76
 
@@ -117,12 +117,10 @@ module PEROBS
117
117
 
118
118
  # Close the tree file.
119
119
  def close
120
-
121
- def val_perc(value, total)
122
- "#{value} (#{(value.to_f / total*100.0).to_i}%)"
123
- end
124
-
125
120
  sync
121
+ PEROBS.log.info "BTree file #{@name} has currently " +
122
+ "#{@nodes.total_entries} used entries and #{@nodes.total_spaces} " +
123
+ "unused entries"
126
124
  @nodes.close
127
125
  @root = nil
128
126
  end
@@ -165,21 +163,34 @@ module PEROBS
165
163
  return false unless @nodes.check
166
164
 
167
165
  entries = 0
168
- res = true
166
+ stats = nil
169
167
  @progressmeter.start('Checking index structure', @size) do |pm|
170
- res = @root.check do |k, v|
168
+ stats = @root.check do |k, v|
171
169
  pm.update(entries += 1)
172
170
  block_given? ? yield(k, v) : true
173
171
  end
174
172
  end
175
173
 
174
+ return false unless stats
175
+
176
176
  unless entries == @size
177
177
  PEROBS.log.error "The BTree size (#{@size}) and the number of " +
178
178
  "found entries (#{entries}) don't match"
179
179
  return false
180
180
  end
181
+ unless stats.nodes_count == @nodes.total_entries
182
+ PEROBS.log.error "The BTree nodes count (#{stats.nodes_count}) and " +
183
+ "the number of entries in the nodes file (#{@nodes.total_entries}) " +
184
+ "don't match"
185
+ return false
186
+ end
187
+ PEROBS.log.info "Statistics for the BTree #{@name}: " +
188
+ "Number of nodes: #{stats.nodes_count}; " +
189
+ "Branch depth: #{stats.branch_depth}; " +
190
+ "Number of leave nodes: #{stats.leave_nodes}; " +
191
+ "Number of leaves: #{stats.leaves}"
181
192
 
182
- res
193
+ !stats.nil?
183
194
  end
184
195
 
185
196
  # Register a new node as root node of the tree.
@@ -208,8 +219,10 @@ module PEROBS
208
219
  # @param key [Integer] Unique key
209
220
  # @param value [Integer] value
210
221
  def insert(key, value)
211
- @size += 1 if @root.insert(key, value)
212
- @node_cache.flush
222
+ if @root.insert(key, value)
223
+ @size += 1
224
+ @node_cache.flush
225
+ end
213
226
  end
214
227
 
215
228
  # Retrieve the value associated with the given key. If no entry was found,
@@ -220,6 +233,13 @@ module PEROBS
220
233
  @root.get(key)
221
234
  end
222
235
 
236
+ # Either return the key/value pair that exactly matches the key or a
237
+ # key/value pair that has a key that is at least min_miss_increment larger
238
+ # than the key.
239
+ def get_best_match(key, min_miss_increment)
240
+ @root.get_best_match(key, min_miss_increment)
241
+ end
242
+
223
243
  # Find and remove the value associated with the given key. If no entry was
224
244
  # found, return nil, otherwise the found value.
225
245
  def remove(key)
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = BTreeBlob.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -144,11 +144,12 @@ module PEROBS
144
144
 
145
145
  # Remove all entries from the index that have not been marked.
146
146
  # @return [Array] List of deleted object IDs.
147
- def delete_unmarked_entries
147
+ def delete_unmarked_entries(&block)
148
148
  deleted_ids = []
149
149
  # First remove the entry from the hash table.
150
150
  @entries_by_id.delete_if do |id, e|
151
151
  if e[MARKED] == 0
152
+ yield(id) if block_given?
152
153
  deleted_ids << id
153
154
  true
154
155
  else
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = BTreeDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -159,9 +160,9 @@ module PEROBS
159
160
  # Permanently delete all objects that have not been marked. Those are
160
161
  # orphaned and are no longer referenced by any actively used object.
161
162
  # @return [Array] List of IDs that have been removed from the DB.
162
- def delete_unmarked_objects
163
+ def delete_unmarked_objects(&block)
163
164
  deleted_ids = []
164
- each_blob { |blob| deleted_ids += blob.delete_unmarked_entries }
165
+ each_blob { |blob| deleted_ids += blob.delete_unmarked_entries(&block) }
165
166
  deleted_ids
166
167
  end
167
168
 
@@ -39,6 +39,8 @@ module PEROBS
39
39
  # mark a node as leaf or branch node.
40
40
  class BTreeNode
41
41
 
42
+ Stats = Struct.new(:branch_depth, :nodes_count, :leave_nodes, :leaves)
43
+
42
44
  attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling,
43
45
  :keys, :values, :children
44
46
 
@@ -225,6 +227,7 @@ module PEROBS
225
227
  else
226
228
  # Descend into the right child node to add the value to.
227
229
  node = node.children[node.search_key_index(key)]
230
+ node = node.get_node if node
228
231
  end
229
232
  end
230
233
 
@@ -249,6 +252,62 @@ module PEROBS
249
252
 
250
253
  # Descend into the right child node to continue the search.
251
254
  node = node.children[i]
255
+ node = node.get_node if node
256
+ end
257
+
258
+ PEROBS.log.fatal "Could not find proper node to get from while " +
259
+ "looking for key #{key}"
260
+ end
261
+
262
+ # Return the key/value pair that matches the given key or the next larger
263
+ # key/value pair with a key that is at least as large as key +
264
+ # min_miss_increment.
265
+ # @param key [Integer] key to search for
266
+ # @param min_miss_increment [Integer] minimum required key increment in
267
+ # case an exact key match could not be found
268
+ # @return [Integer or nil] value that matches the key
269
+ def get_best_match(key, min_miss_increment)
270
+ node = self
271
+
272
+ while node do
273
+ # Find index of the entry that best fits the key.
274
+ i = node.search_key_index(key)
275
+ if node.is_leaf
276
+ # This is a leaf node. Check if there is an exact match for the
277
+ # given key.
278
+ if node.keys[i] == key
279
+ # Return the corresponding value/value pair.
280
+ return [ key, node.values[i] ]
281
+ else
282
+ # No exact key match. Now search the larger keys for the first
283
+ # that is at least key + min_miss_increment large.
284
+ keys = node.keys
285
+ keys_length = keys.length
286
+ while node
287
+ if i >= keys_length
288
+ # We've reached the end of a node. Continue search in next
289
+ # sibling.
290
+ return nil unless (node = node.next_sibling)
291
+ node = node.get_node
292
+ keys = node.keys
293
+ keys_length = keys.length
294
+ i = -1
295
+ elsif keys[i] >= key + min_miss_increment
296
+ # We've found a key that fits the critera. Return the
297
+ # corresponding key/value pair.
298
+ return [ keys[i], node.values[i] ]
299
+ end
300
+
301
+ i += 1
302
+ end
303
+
304
+ return nil
305
+ end
306
+ end
307
+
308
+ # Descend into the right child node to continue the search.
309
+ node = node.children[i]
310
+ node = node.get_node if node
252
311
  end
253
312
 
254
313
  PEROBS.log.fatal "Could not find proper node to get from while " +
@@ -277,6 +336,7 @@ module PEROBS
277
336
 
278
337
  # Descend into the right child node to continue the search.
279
338
  node = node.children[i]
339
+ node = node.get_node if node
280
340
  end
281
341
 
282
342
  PEROBS.log.fatal 'Could not find proper node to remove from'
@@ -310,22 +370,6 @@ module PEROBS
310
370
  @parent
311
371
  end
312
372
 
313
- def merge_node(upper_sibling, parent_index)
314
- if upper_sibling == self
315
- PEROBS.log.fatal "Cannot merge node @#{@node_address} with self"
316
- end
317
- unless upper_sibling.is_leaf
318
- insert_element(@parent.keys[parent_index], upper_sibling.children[0])
319
- end
320
- upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size)
321
- if (@next_sibling = link(upper_sibling.next_sibling))
322
- @next_sibling.prev_sibling = link(self)
323
- end
324
- @tree.delete_node(upper_sibling.node_address)
325
-
326
- @parent.remove_element(parent_index)
327
- end
328
-
329
373
  # Insert the given value or child into the current node using the key as
330
374
  # index.
331
375
  # @param key [Integer] key to address the value or child
@@ -428,13 +472,8 @@ module PEROBS
428
472
  end
429
473
  end
430
474
 
431
- if @parent.nil? && @children.length == 1
432
- # If the node just below the root only has one child it will become
433
- # the new root node.
434
- new_root = @children.first
435
- new_root.parent = nil
436
- @tree.set_root(new_root)
437
- end
475
+ # Delete the node from the cache and backing store.
476
+ @tree.delete_node(node.node_address)
438
477
  end
439
478
 
440
479
  def merge_with_leaf_node(node)
@@ -561,36 +600,8 @@ module PEROBS
561
600
  # @param key [Integer] key to search for
562
601
  # @return [Integer] Index of the matching key or the insert position.
563
602
  def search_key_index(key)
564
- # Handle special case for empty keys list.
565
- return 0 if @keys.empty?
566
-
567
- # Keys are unique and always sorted. Use a binary search to find the
568
- # index that fits the given key.
569
- li = pi = 0
570
- ui = @keys.size - 1
571
- while li <= ui
572
- # The pivot element is always in the middle between the lower and upper
573
- # index.
574
- pi = li + (ui - li) / 2
575
-
576
- if key < @keys[pi]
577
- # The pivot element is smaller than the key. Set the upper index to
578
- # the pivot index.
579
- ui = pi - 1
580
- elsif key > @keys[pi]
581
- # The pivot element is larger than the key. Set the lower index to
582
- # the pivot index.
583
- li = pi + 1
584
- else
585
- # We've found an exact match. For leaf nodes return the found index.
586
- # For branch nodes we have to add one to the index since the larger
587
- # child is the right one.
588
- return @is_leaf ? pi : pi + 1
589
- end
590
- end
591
- # No exact match was found. For the insert operaton we need to return
592
- # the index of the first key that is larger than the given key.
593
- @keys[pi] < key ? pi + 1 : pi
603
+ (@is_leaf ? @keys.bsearch_index { |x| x >= key } :
604
+ @keys.bsearch_index { |x| x > key }) || @keys.length
594
605
  end
595
606
 
596
607
  # Iterate over all the key/value pairs in this node and all sub-nodes.
@@ -641,25 +652,28 @@ module PEROBS
641
652
  # Check consistency of the node and all subsequent nodes. In case an error
642
653
  # is found, a message is logged and false is returned.
643
654
  # @yield [key, value]
644
- # @return [Boolean] true if tree has no errors
655
+ # @return [nil or Hash] nil in case of errors or a hash with some
656
+ # statistical information about the tree
645
657
  def check
646
- branch_depth = nil
658
+ stats = Stats.new(nil, 0, 0, 0)
647
659
 
648
660
  traverse do |node, position, stack|
649
661
  if position == 0
662
+ stats.nodes_count += 1
650
663
  if node.parent
651
664
  # After a split the nodes will only have half the maximum keys.
652
665
  # For branch nodes one of the split nodes will have even 1 key
653
666
  # less as this will become the branch key in a parent node.
654
667
  if node.keys.size < min_keys - (node.is_leaf ? 0 : 1)
655
668
  node.error "BTreeNode #{node.node_address} has too few keys"
656
- return false
669
+ return nil
657
670
  end
658
671
  end
659
672
 
660
673
  if node.keys.size > @tree.order
661
674
  node.error "BTreeNode must not have more then #{@tree.order} " +
662
675
  "keys, but has #{node.keys.size} keys"
676
+ return nil
663
677
  end
664
678
 
665
679
  last_key = nil
@@ -667,79 +681,82 @@ module PEROBS
667
681
  if last_key && key < last_key
668
682
  node.error "Keys are not increasing monotoneously: " +
669
683
  "#{node.keys.inspect}"
670
- return false
684
+ return nil
671
685
  end
672
686
  last_key = key
673
687
  end
674
688
 
675
689
  if node.is_leaf
676
- if branch_depth
677
- unless branch_depth == stack.size
690
+ if stats.branch_depth
691
+ unless stats.branch_depth == node.tree_level
678
692
  node.error "All leaf nodes must have same distance from root "
679
- return false
693
+ return nil
680
694
  end
681
695
  else
682
- branch_depth = stack.size
696
+ stats.branch_depth = node.tree_level
683
697
  end
684
698
  if node.prev_sibling.nil? && @tree.first_leaf != node
685
699
  node.error "Leaf node #{node.node_address} has no previous " +
686
700
  "sibling but is not the first leaf of the tree"
687
- return false
701
+ return nil
688
702
  end
689
703
  if node.next_sibling.nil? && @tree.last_leaf != node
690
704
  node.error "Leaf node #{node.node_address} has no next sibling " +
691
705
  "but is not the last leaf of the tree"
692
- return false
706
+ return nil
693
707
  end
694
708
  unless node.keys.size == node.values.size
695
709
  node.error "Key count (#{node.keys.size}) and value " +
696
710
  "count (#{node.values.size}) don't match"
697
- return false
711
+ return nil
698
712
  end
699
713
  unless node.children.empty?
700
714
  node.error "@children must be nil for a leaf node"
701
- return false
715
+ return nil
702
716
  end
717
+
718
+ stats.leave_nodes += 1
719
+ stats.leaves += node.keys.length
703
720
  else
704
721
  unless node.values.empty?
705
722
  node.error "@values must be nil for a branch node"
706
- return false
723
+ return nil
707
724
  end
708
725
  unless node.children.size == node.keys.size + 1
709
726
  node.error "Key count (#{node.keys.size}) must be one " +
710
727
  "less than children count (#{node.children.size})"
711
- return false
728
+ return nil
712
729
  end
713
730
  node.children.each_with_index do |child, i|
714
731
  unless child.is_a?(BTreeNodeLink)
715
732
  node.error "Child #{i} is of class #{child.class} " +
716
733
  "instead of BTreeNodeLink"
717
- return false
734
+ return nil
718
735
  end
719
736
  unless child.parent.is_a?(BTreeNodeLink)
720
737
  node.error "Parent reference of child #{i} is of class " +
721
738
  "#{child.parent.class} instead of BTreeNodeLink"
722
- return false
739
+ return nil
723
740
  end
724
741
  if child == node
725
742
  node.error "Child #{i} points to self"
726
- return false
743
+ return nil
727
744
  end
728
745
  if stack.include?(child)
729
746
  node.error "Child #{i} points to ancester node"
730
- return false
747
+ return nil
731
748
  end
732
749
  unless child.parent == node
733
750
  node.error "Child #{i} does not have parent pointing " +
734
751
  "to this node"
735
- return false
752
+ return nil
736
753
  end
737
754
  if i > 0
738
755
  unless node.children[i - 1].next_sibling == child
739
756
  node.error "next_sibling of node " +
740
757
  "#{node.children[i - 1].node_address} " +
741
758
  "must point to node #{child.node_address}"
742
- return false
759
+ return nil
743
760
  end
744
761
  end
745
762
  if i < node.children.length - 1
@@ -747,7 +764,7 @@ module PEROBS
747
764
  node.error "prev_sibling of node " +
748
765
  "#{node.children[i + 1].node_address} " +
749
766
  "must point to node #{child.node_address}"
750
- return false
767
+ return nil
751
768
  end
752
769
  end
753
770
  end
@@ -761,24 +778,24 @@ module PEROBS
761
778
  node.error "Child #{node.children[index].node_address} " +
762
779
  "has too large key #{node.children[index].keys.last}. " +
763
780
  "Must be smaller than #{node.keys[index]}."
764
- return false
781
+ return nil
765
782
  end
766
783
  unless node.children[position].keys.first >= node.keys[index]
767
784
  node.error "Child #{node.children[position].node_address} " +
768
785
  "has too small key #{node.children[position].keys.first}. " +
769
786
  "Must be larger than or equal to #{node.keys[index]}."
770
- return false
787
+ return nil
771
788
  end
772
789
  else
773
790
  if block_given?
774
791
  # If a block was given, call this block with the key and value.
775
- return false unless yield(node.keys[index], node.values[index])
792
+ return nil unless yield(node.keys[index], node.values[index])
776
793
  end
777
794
  end
778
795
  end
779
796
  end
780
797
 
781
- true
798
+ stats
782
799
  end
783
800
 
784
801
  def is_top?
@@ -834,6 +851,7 @@ module PEROBS
834
851
 
835
852
  str = (is_last_child ? ' ' : ' |') + str
836
853
  node = node.parent
854
+ node = node.get_node if node
837
855
  end
838
856
 
839
857
  str
@@ -871,6 +889,17 @@ module PEROBS
871
889
  s
872
890
  end
873
891
 
892
+ def tree_level
893
+ level = 1
894
+ node = self
895
+ while (node = node.parent)
896
+ level += 1
897
+ end
898
+
899
+ level
900
+ end
901
+
902
+
874
903
  def error(msg)
875
904
  PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}"
876
905
  end