perobs 4.2.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7790ee42586bb2b8fca115f93ed277c4a8057f7a7027b356baea7b066da953e5
4
- data.tar.gz: 110e0710a84ef544a4874cf868ec1662dfc900077d1894b24f53bcdaeaeeed34
3
+ metadata.gz: a61fc945e0ef9f5ed6558080931d2acae42cc0401f375275684e4ee32fefe4f7
4
+ data.tar.gz: 4d864fdc0791aa78d8c180b4686ee825cd25e209284fca1966f144813c063280
5
5
  SHA512:
6
- metadata.gz: d95d845c7e8bd183f53b60369415bde86cd766c224bcbc2c52c870c60542a786f359a63eecc4e8829055cee6a1674bc952277749da183432b4b3abae7536efbb
7
- data.tar.gz: 5fa1712fb01118d955d86396aec87b319c75856f6602ccf1a19f475a3dc64dc65a540e35271e6f2c6c7ee6a10c5cb03da30d43945a0dc264c7b0e48f575269d1
6
+ metadata.gz: f3834a9caae693d82837fb9f75141cb35e85f1a2c1439d1bb898f8578d9ae082f46deb0233d2b8a02d6ae6b0bf66862098b47ff571ff3d9a6b874fadaef6d23a
7
+ data.tar.gz: 883f1b5e553fae2be0039aa090d89bf6eb44ec1d0dc31488aeaa727ec8bc2844c9b722568cd7dde0b33c507121afd11e720154d4ff27e66aa3ac3812d5603954
data/README.md CHANGED
@@ -108,7 +108,7 @@ class Person < PEROBS::Object
108
108
  attr_init(:father) do { @store.new(Person, 'Dad') }
109
109
  end
110
110
 
111
- def merry(spouse)
111
+ def marry(spouse)
112
112
  self.spouse = spouse
113
113
  self.status = :married
114
114
  end
data/lib/perobs.rb CHANGED
@@ -28,3 +28,4 @@
28
28
  require "perobs/version"
29
29
  require 'perobs/Store'
30
30
  require 'perobs/ConsoleProgressMeter'
31
+ require 'perobs/FuzzyStringMatcher'
data/lib/perobs/BTree.rb CHANGED
@@ -70,7 +70,7 @@ module PEROBS
70
70
  @nodes.register_custom_data('first_leaf')
71
71
  @nodes.register_custom_data('last_leaf')
72
72
  @nodes.register_custom_data('btree_size')
73
- @node_cache = PersistentObjectCache.new(2**16, -1, BTreeNode, self)
73
+ @node_cache = PersistentObjectCache.new(2**13, 2**13, BTreeNode, self)
74
74
  @root = @first_leaf = @last_leaf = nil
75
75
  @size = 0
76
76
 
@@ -190,7 +190,7 @@ module PEROBS
190
190
  "Number of leave nodes: #{stats.leave_nodes}; " +
191
191
  "Number of leaves: #{stats.leaves}"
192
192
 
193
- !stats.nil?
193
+ true
194
194
  end
195
195
 
196
196
  # Register a new node as root node of the tree.
@@ -59,7 +59,7 @@ module PEROBS
59
59
  # if not
60
60
  def initialize(tree, node_address = nil, parent = nil, is_leaf = true,
61
61
  prev_sibling = nil, next_sibling = nil,
62
- keys = [], values = [], children = [])
62
+ keys = nil, values = nil, children = nil)
63
63
  @tree = tree
64
64
  if node_address == 0
65
65
  PEROBS.log.fatal "Node address may not be 0"
@@ -68,13 +68,13 @@ module PEROBS
68
68
  @parent = link(parent)
69
69
  @prev_sibling = link(prev_sibling)
70
70
  @next_sibling = link(next_sibling)
71
- @keys = keys
71
+ @keys = keys || []
72
72
  if (@is_leaf = is_leaf)
73
- @values = values
74
- @children = []
73
+ @values = values || []
74
+ @children = nil
75
75
  else
76
- @children = children
77
- @values = []
76
+ @children = children || []
77
+ @values = nil
78
78
  end
79
79
  end
80
80
 
@@ -585,11 +585,11 @@ module PEROBS
585
585
  end
586
586
 
587
587
  def trim(idx)
588
- @keys = @keys[0..idx - 1]
588
+ @keys.slice!(idx, @keys.length - idx)
589
589
  if @is_leaf
590
- @values = @values[0..idx - 1]
590
+ @values.slice!(idx, @values.length - idx)
591
591
  else
592
- @children = @children[0..idx]
592
+ @children.slice!(idx + 1, @children.length - idx - 1)
593
593
  end
594
594
  @tree.node_cache.insert(self)
595
595
  end
@@ -654,13 +654,18 @@ module PEROBS
654
654
  # @yield [key, value]
655
655
  # @return [nil or Hash] nil in case of errors or a hash with some
656
656
  # statistical information about the tree
657
- def check
657
+ def check(&block)
658
658
  stats = Stats.new(nil, 0, 0, 0)
659
659
 
660
660
  traverse do |node, position, stack|
661
661
  if position == 0
662
662
  stats.nodes_count += 1
663
663
  if node.parent
664
+ unless node.parent.is_a?(BTreeNodeLink)
665
+ node.error "parent is a #{node.parent.class} instead of a " +
666
+ "BTreeNodeLink"
667
+ return nil
668
+ end
664
669
  # After a split the nodes will only have half the maximum keys.
665
670
  # For branch nodes one of the split nodes will have even 1 key
666
671
  # less as this will become the branch key in a parent node.
@@ -695,6 +700,16 @@ module PEROBS
695
700
  else
696
701
  stats.branch_depth = node.tree_level
697
702
  end
703
+ if node.prev_sibling && !node.prev_sibling.is_a?(BTreeNodeLink)
704
+ node.error "prev_sibling is a #{node.prev_sibling.class} " +
705
+ "instead of a BTreeNodeLink"
706
+ return nil
707
+ end
708
+ if node.next_sibling && !node.next_sibling.is_a?(BTreeNodeLink)
709
+ node.error "next_sibling is a #{node.next_sibling.class} " +
710
+ "instead of a BTreeNodeLink"
711
+ return nil
712
+ end
698
713
  if node.prev_sibling.nil? && @tree.first_leaf != node
699
714
  node.error "Leaf node #{node.node_address} has no previous " +
700
715
  "sibling but is not the first leaf of the tree"
@@ -708,9 +723,9 @@ module PEROBS
708
723
  unless node.keys.size == node.values.size
709
724
  node.error "Key count (#{node.keys.size}) and value " +
710
725
  "count (#{node.values.size}) don't match"
711
- return nil
726
+ return nil
712
727
  end
713
- unless node.children.empty?
728
+ unless node.children.nil?
714
729
  node.error "@children must be nil for a leaf node"
715
730
  return nil
716
731
  end
@@ -718,14 +733,14 @@ module PEROBS
718
733
  stats.leave_nodes += 1
719
734
  stats.leaves += node.keys.length
720
735
  else
721
- unless node.values.empty?
736
+ unless node.values.nil?
722
737
  node.error "@values must be nil for a branch node"
723
738
  return nil
724
739
  end
725
740
  unless node.children.size == node.keys.size + 1
726
741
  node.error "Key count (#{node.keys.size}) must be one " +
727
742
  "less than children count (#{node.children.size})"
728
- return nil
743
+ return nil
729
744
  end
730
745
  node.children.each_with_index do |child, i|
731
746
  unless child.is_a?(BTreeNodeLink)
@@ -789,7 +804,9 @@ module PEROBS
789
804
  else
790
805
  if block_given?
791
806
  # If a block was given, call this block with the key and value.
792
- return nil unless yield(node.keys[index], node.values[index])
807
+ unless yield(node.keys[index], node.values[index])
808
+ return nil
809
+ end
793
810
  end
794
811
  end
795
812
  end
@@ -293,7 +293,7 @@ module PEROBS
293
293
  header = FlatFileBlobHeader.read(@f, addr, id)
294
294
  if header.id != id
295
295
  PEROBS.log.fatal "Database index corrupted: Index for object " +
296
- "#{id} points to object with ID #{header.id}"
296
+ "#{id} points to object with ID #{header.id} at address #{addr}"
297
297
  end
298
298
 
299
299
  buf = nil
@@ -302,7 +302,8 @@ module PEROBS
302
302
  @f.seek(addr + FlatFileBlobHeader::LENGTH)
303
303
  buf = @f.read(header.length)
304
304
  rescue IOError => e
305
- PEROBS.log.fatal "Cannot read blob for ID #{id}: #{e.message}"
305
+ PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " +
306
+ e.message
306
307
  end
307
308
 
308
309
  # Uncompress the data if the compression bit is set in the flags byte.
@@ -311,12 +312,13 @@ module PEROBS
311
312
  buf = Zlib.inflate(buf)
312
313
  rescue Zlib::BufError, Zlib::DataError
313
314
  PEROBS.log.fatal "Corrupted compressed block with ID " +
314
- "#{header.id} found."
315
+ "#{id} found at address #{addr}."
315
316
  end
316
317
  end
317
318
 
318
319
  if checksum(buf) != header.crc
319
- PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
320
+ PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " +
321
+ "at address #{addr}"
320
322
  end
321
323
 
322
324
  buf
@@ -339,7 +341,7 @@ module PEROBS
339
341
  if @marks
340
342
  @marks.clear
341
343
  else
342
- @marks = IDList.new(@db_dir, 'marks', 8)
344
+ @marks = IDList.new(@db_dir, 'marks', item_counter)
343
345
  end
344
346
  end
345
347
 
@@ -452,16 +454,14 @@ module PEROBS
452
454
  regenerate_index_and_spaces
453
455
  end
454
456
 
455
- # Check (and repair) the FlatFile.
456
- # @param repair [Boolean] True if errors should be fixed.
457
+ # Check the FlatFile.
457
458
  # @return [Integer] Number of errors found
458
- def check(repair = false)
459
+ def check()
459
460
  errors = 0
460
461
  return errors unless @f
461
462
 
462
463
  t = Time.now
463
- PEROBS.log.info "Checking FlatFile database" +
464
- "#{repair ? ' in repair mode' : ''}..."
464
+ PEROBS.log.info "Checking FlatFile database..."
465
465
 
466
466
  # First check the database blob file. Each entry should be readable and
467
467
  # correct and all IDs must be unique. We use a shadow index to keep
@@ -483,7 +483,6 @@ module PEROBS
483
483
  if buf.bytesize != header.length
484
484
  PEROBS.log.error "Premature end of file in blob with ID " +
485
485
  "#{header.id}."
486
- discard_damaged_blob(header) if repair
487
486
  errors += 1
488
487
  next
489
488
  end
@@ -496,7 +495,6 @@ module PEROBS
496
495
  rescue Zlib::BufError, Zlib::DataError
497
496
  PEROBS.log.error "Corrupted compressed block with ID " +
498
497
  "#{header.id} found."
499
- discard_damaged_blob(header) if repair
500
498
  errors += 1
501
499
  next
502
500
  end
@@ -505,7 +503,6 @@ module PEROBS
505
503
  if header.crc && checksum(buf) != header.crc
506
504
  PEROBS.log.error "Checksum failure while checking blob " +
507
505
  "with ID #{header.id}"
508
- discard_damaged_blob(header) if repair
509
506
  errors += 1
510
507
  next
511
508
  end
@@ -521,22 +518,6 @@ module PEROBS
521
518
  errors += 1
522
519
  previous_header = FlatFileBlobHeader.read(@f, previous_address,
523
520
  header.id)
524
- if repair
525
- # We have two blobs with the same ID and we must discard one of
526
- # them.
527
- if header.is_outdated?
528
- discard_damaged_blob(header)
529
- elsif previous_header.is_outdated?
530
- discard_damaged_blob(previous_header)
531
- else
532
- PEROBS.log.error "None of the blobs with same ID have " +
533
- "the outdated flag set. Deleting the smaller one."
534
- errors += 1
535
- discard_damaged_blob(header.length < previous_header.length ?
536
- header : previous_header)
537
- end
538
- next
539
- end
540
521
  else
541
522
  # ID is unique so far. Add it to the shadow index.
542
523
  new_index.insert(header.id, header.addr)
@@ -553,12 +534,6 @@ module PEROBS
553
534
  PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
554
535
  'bytes found at the end of FlatFile.'
555
536
  corrupted_blobs += 1
556
- if repair
557
- PEROBS.log.error "Truncating FlatFile to " +
558
- "#{end_of_last_healthy_blob} bytes by discarding " +
559
- "#{@f.size - end_of_last_healthy_blob} bytes"
560
- @f.truncate(end_of_last_healthy_blob)
561
- end
562
537
  end
563
538
 
564
539
  errors += corrupted_blobs
@@ -568,17 +543,19 @@ module PEROBS
568
543
  new_index.close
569
544
  new_index.erase
570
545
 
571
- if repair && corrupted_blobs > 0
572
- erase_index_files
573
- defragmentize
574
- regenerate_index_and_spaces
575
- elsif corrupted_blobs == 0
546
+ if corrupted_blobs == 0
576
547
  # Now we check the index data. It must be correct and the entries must
577
548
  # match the blob file. All entries in the index must be in the blob file
578
549
  # and vise versa.
579
550
  begin
580
551
  index_ok = @index.check do |id, address|
581
- has_id_at?(id, address)
552
+ unless has_id_at?(id, address)
553
+ PEROBS.log.error "Index contains an entry for " +
554
+ "ID #{id} at address #{address} that is not in FlatFile"
555
+ false
556
+ else
557
+ true
558
+ end
582
559
  end
583
560
  x_check_errs = 0
584
561
  space_check_ok = true
@@ -586,16 +563,13 @@ module PEROBS
586
563
  (x_check_errs = cross_check_entries) == 0
587
564
  errors += 1 unless index_ok && space_check_ok
588
565
  errors += x_check_errs
589
- regenerate_index_and_spaces if repair
590
566
  end
591
567
  rescue PEROBS::FatalError
592
568
  errors += 1
593
- regenerate_index_and_spaces if repair
594
569
  end
595
570
  end
596
571
 
597
- sync if repair
598
- PEROBS.log.info "check_db completed in #{Time.now - t} seconds. " +
572
+ PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " +
599
573
  "#{errors} errors found."
600
574
 
601
575
  errors
@@ -687,17 +661,7 @@ module PEROBS
687
661
  header.id)
688
662
  # We have two blobs with the same ID and we must discard one of
689
663
  # them.
690
- if header.is_outdated?
691
- discard_damaged_blob(header)
692
- elsif previous_header.is_outdated?
693
- discard_damaged_blob(previous_header)
694
- else
695
- PEROBS.log.error "None of the blobs with same ID have " +
696
- "the outdated flag set. Deleting the smaller one."
697
- errors += 1
698
- discard_damaged_blob(header.length < previous_header.length ?
699
- header : previous_header)
700
- end
664
+ discard_duplicate_blobs(header, previous_header)
701
665
  else
702
666
  # ID is unique so far. Add it to the shadow index.
703
667
  @index.insert(header.id, header.addr)
@@ -927,6 +891,23 @@ module PEROBS
927
891
  header.clear_flags
928
892
  end
929
893
 
894
+ def discard_duplicate_blobs(header, previous_header)
895
+ if header.is_outdated?
896
+ discard_damaged_blob(header)
897
+ elsif previous_header.is_outdated?
898
+ discard_damaged_blob(previous_header)
899
+ else
900
+ smaller, larger = header.length < previous_header.length ?
901
+ [ header, previous_header ] : [ previous_header, header ]
902
+ PEROBS.log.error "None of the blobs with same ID have " +
903
+ "the outdated flag set. Deleting the smaller one " +
904
+ "at address #{smaller.addr}"
905
+ discard_damaged_blob(smaller)
906
+ @space_list.add_space(smaller.addr, smaller.length)
907
+ @index.insert(larger.id, larger.addr)
908
+ end
909
+ end
910
+
930
911
  def open_index_files(abort_on_missing_files = false)
931
912
  begin
932
913
  @index.open(abort_on_missing_files)
@@ -26,40 +26,42 @@
26
26
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
27
 
28
28
  require 'perobs/Log'
29
- require 'perobs/ObjectBase'
29
+ require 'perobs/Object'
30
30
 
31
31
  module PEROBS
32
32
 
33
33
  # The fuzzy string matcher can be used to perform a fuzzy string search
34
34
  # against a known set of strings. The dictionary of known strings does not
35
- # store the actual strings but references to arbitrary objects. These could
36
- # be the string, but can be something else related to the learned strings.
37
- # To use this class a list of strings with their references must be learned.
38
- # Once the dictionary has been established, fuzzy matches can be done.
39
- class FuzzyStringMatcher
35
+ # store the actual strings but references to String or PEROBS objects.
36
+ # Once the dictionary has been established, fuzzy matches can be done. Since
37
+ # the actual input strings are not directly stored, you cannot remove or
38
+ # modified already stored strings. To remove strings, you have to clear the
39
+ # matcher and add the strings again that you want to keep.
40
+ class FuzzyStringMatcher < PEROBS::Object
41
+
42
+ attr_persist :case_sensitive, :n, :dict
40
43
 
41
44
  # Create a new FuzzyStringMatcher.
42
- # @param store [PEROBS::Store] place to store the dictionary
43
- # @param name [String] Unique name of the string matcher
45
+ # @param p [PEROBS::Store] place to store the dictionary
44
46
  # @param case_sensitive [Boolean] True if case matters for matching
45
47
  # @param n [Integer] Determines what kind of n-gramm is used to store the
46
48
  # references in the dictionary. It also determines the minimum word
47
- # length that can be used for fuzzy matches.
48
- def initialize(store, name, case_sensitive = false, n = 4)
49
- @store = store
50
- @dict_name = "FuzzyStringMatcher::#{name}"
49
+ # length that can be used for fuzzy matches. Values between 2 and
50
+ # 10 are supported. The default is 4.
51
+ def initialize(p, case_sensitive = false, n = 4)
52
+ super(p)
51
53
  if n < 2 || n > 10
52
54
  raise ArgumentError, 'n must be between 2 and 10'
53
55
  end
54
- @case_sensitive = case_sensitive
55
- @n = n
56
+ self.case_sensitive = case_sensitive
57
+ self.n = n
56
58
 
57
- clear unless (@dict = @store[@dict_name])
59
+ clear unless @dict
58
60
  end
59
61
 
60
62
  # Wipe the dictionary.
61
63
  def clear
62
- @store[@dict_name] = @dict = @store.new(BigHash)
64
+ self.dict = @store.new(BigHash)
63
65
  end
64
66
 
65
67
  # Add a string with its reference to the dictionary.
@@ -79,11 +81,8 @@ module PEROBS
79
81
  @dict[n_gramm] = ng_list = @store.new(Hash)
80
82
  end
81
83
 
82
- if ng_list.include?(reference)
83
- ng_list[reference] += 1
84
- else
85
- ng_list[reference] = 0
86
- end
84
+ # We use the Hash as a Set. The value doesn't matter.
85
+ ng_list[reference] = true unless ng_list.include?(reference)
87
86
  end
88
87
 
89
88
  nil
@@ -109,22 +108,12 @@ module PEROBS
109
108
 
110
109
  matches = {}
111
110
 
112
- # This will be the best possible score for a perfect match.
113
- best_possible_score = 0
114
111
  each_n_gramm(string) do |n_gramm|
115
- best_possible_score += 1
116
112
  if (ng_list = @dict[n_gramm])
117
- ng_list.each do |reference, count|
113
+ ng_list.each do |reference, dummy|
118
114
  if matches.include?(reference)
119
115
  matches[reference] += 1
120
116
  else
121
- # We use internally a 10 times larger list so that we don't
122
- # throw away good matches too early. If the max_count value is
123
- # chosen too small there is a risk of not finding the best
124
- # matches!
125
- if matches.size > 10 * max_count
126
- matches = discard_worst_match(matches)
127
- end
128
117
  matches[reference] = 1
129
118
  end
130
119
  end
@@ -133,19 +122,23 @@ module PEROBS
133
122
 
134
123
  return [] if matches.empty?
135
124
 
136
- # Sort in the order of occurance count downwards.
137
- match_list = matches.to_a.sort do |a, b|
138
- b[1] <=> a[1]
139
- end
125
+ match_list = matches.to_a
140
126
 
141
127
  # Set occurance counters to scores relative to the best possible score.
128
+ # This will be the best possible score for a perfect match.
129
+ best_possible_score = string.length - @n + 1
142
130
  match_list.map! { |a, b| [ a, b.to_f / best_possible_score ] }
143
131
 
144
- # Delete all matches that occured less than half as often than the
145
- # top match.
132
+ # Delete all matches that don't have the required minimum match score.
146
133
  match_list.delete_if { |a| a[1] < min_score }
147
134
 
148
- match_list[0..max_count]
135
+ # Sort the list best to worst match
136
+ match_list.sort! do |a, b|
137
+ b[1] <=> a[1]
138
+ end
139
+
140
+ # Return the top max_count matches.
141
+ match_list[0..max_count - 1]
149
142
  end
150
143
 
151
144
  # Returns some internal stats about the dictionary.
@@ -176,16 +169,6 @@ module PEROBS
176
169
  end
177
170
  end
178
171
 
179
- def discard_worst_match(matches)
180
- # Sort in the order of occurance count downwards.
181
- match_list = matches.to_a.sort do |a, b|
182
- b[1] <=> a[1]
183
- end
184
- # Discard the lowest half of the matches
185
- match_list = match_list[0..match_list.length / 2]
186
- match_list.to_h
187
- end
188
-
189
172
  end
190
173
 
191
174
  end
data/lib/perobs/Hash.rb CHANGED
@@ -124,9 +124,9 @@ module PEROBS
124
124
 
125
125
  # Proxy for assignment method.
126
126
  def []=(key, value)
127
- unless key.is_a?(String)
128
- raise ArgumentError, "PEROBS::Hash[] key must be a String but is a " +
129
- "#{key.class}"
127
+ unless key.is_a?(String) || key.respond_to?(:is_poxreference?)
128
+ raise ArgumentError, "PEROBS::Hash[] key must be a String or " +
129
+ "a PEROBS object but is a #{key.class}"
130
130
  end
131
131
  _check_assignment_value(value)
132
132
  @store.cache.cache_write(self)
@@ -143,18 +143,33 @@ module PEROBS
143
143
  # is referencing.
144
144
  # @return [Array of Integer] IDs of referenced objects
145
145
  def _referenced_object_ids
146
- @data.each_value.select { |v| v && v.respond_to?(:is_poxreference?) }.
147
- map { |o| o.id }
146
+ ids = []
147
+ @data.each do |k, v|
148
+ if k && k.respond_to?(:is_poxreference?)
149
+ ids << k.id
150
+ end
151
+ if v && v.respond_to?(:is_poxreference?)
152
+ ids << v.id
153
+ end
154
+ end
155
+
156
+ ids
148
157
  end
149
158
 
150
159
  # This method should only be used during store repair operations. It will
151
160
  # delete all referenced to the given object ID.
152
161
  # @param id [Integer] targeted object ID
153
162
  def _delete_reference_to_id(id)
163
+ original_length = @data.length
164
+
154
165
  @data.delete_if do |k, v|
155
- v && v.respond_to?(:is_poxreference?) && v.id == id
166
+ (k && k.respond_to?(:is_poxreference?) && k.id == id) ||
167
+ (v && v.respond_to?(:is_poxreference?) && v.id == id)
168
+ end
169
+
170
+ if @data.length != original_length
171
+ @store.cache.cache_write(self)
156
172
  end
157
- @store.cache.cache_write(self)
158
173
  end
159
174
 
160
175
  # Restore the persistent data from a single data structure.
@@ -163,8 +178,18 @@ module PEROBS
163
178
  # @private
164
179
  def _deserialize(data)
165
180
  @data = {}
166
- data.each { |k, v| @data[k] = v.is_a?(POReference) ?
167
- POXReference.new(@store, v.id) : v }
181
+
182
+ data.each do |k, v|
183
+ # References to other PEROBS Objects are marshalled with our own
184
+ # format. If we detect such a marshalled String we convert it into a
185
+ # POXReference object.
186
+ if (match = /^#<PEROBS::POReference id=([0-9]+)>$/.match(k))
187
+ k = POXReference.new(@store, match[1].to_i)
188
+ end
189
+ dv = v.is_a?(POReference) ? POXReference.new(@store, v.id) : v
190
+ @data[k] = dv
191
+ end
192
+
168
193
  @data
169
194
  end
170
195
 
@@ -185,26 +210,46 @@ module PEROBS
185
210
  data = {}
186
211
 
187
212
  @data.each do |k, v|
188
- if v.respond_to?(:is_poxreference?)
189
- data[k] = POReference.new(v.id)
190
- else
191
- # Outside of the PEROBS library all PEROBS::ObjectBase derived
192
- # objects should not be used directly. The library only exposes them
193
- # via POXReference proxy objects.
194
- if v.is_a?(ObjectBase)
195
- PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
196
- "It is stored in a PEROBS::Hash with key #{k.inspect}. " +
197
- 'Have you used self() instead of myself() to ' +
198
- "get the reference of this PEROBS object?\n" +
199
- v.inspect
200
- end
201
- data[k] = v
213
+ if k.respond_to?(:is_poxreference?)
214
+ # JSON only supports Strings as hash keys. Since JSON is the default
215
+ # internal storage format in the database, we have to marshall
216
+ # PEROBS::Object references ourselves.
217
+ k = "#<PEROBS::POReference id=#{k.id}>"
218
+ elsif k[0..24] == '#<PEROBS::POReference id='
219
+ # This could obviously result in conflicts with 'normal' String hash
220
+ # keys. This is extremely unlikely, but we better catch this case
221
+ # before it causes hard to debug trouble.
222
+ raise ArgumentError, "Hash key #{k} conflicts with PEROBS " +
223
+ "internal representation of marshalled hash keys!"
202
224
  end
225
+ data[k] = serialize_helper(v)
203
226
  end
204
227
 
205
228
  data
206
229
  end
207
230
 
231
+ def serialize_helper(v)
232
+ if v.respond_to?(:is_poxreference?)
233
+ # References to other PEROBS objects (POXReference) are stored as
234
+ # POReference in the database.
235
+ return POReference.new(v.id)
236
+ else
237
+ # Outside of the PEROBS library all PEROBS::ObjectBase derived
238
+ # objects should not be used directly. The library only exposes them
239
+ # via POXReference proxy objects.
240
+ if v.is_a?(ObjectBase)
241
+ PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
242
+ "It is stored in a PEROBS::Hash. " +
243
+ 'Have you used self() instead of myself() to ' +
244
+ "get the reference of this PEROBS object?\n" +
245
+ v.inspect
246
+ end
247
+
248
+ # All other objects are serialized by their native methods.
249
+ return v
250
+ end
251
+ end
252
+
208
253
  end
209
254
 
210
255
  end
@@ -54,7 +54,8 @@ module PEROBS
54
54
  @file_name = File.join(dir, name + '.cache')
55
55
  @page_size = page_size
56
56
  open
57
- @pages = PersistentObjectCache.new(max_in_memory, -1, IDListPage, self)
57
+ @pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
58
+ IDListPage, self)
58
59
  @page_counter = 0
59
60
  end
60
61
 
@@ -102,6 +102,13 @@ module PEROBS
102
102
  end
103
103
  end
104
104
 
105
+ # To allow POXReference objects to be used as Hash keys we need to
106
+ # implement this function. Conveniently, we can just use the PEROBS object
107
+ # ID since that is unique.
108
+ def hash
109
+ @id
110
+ end
111
+
105
112
  # Shortcut to access the _id() method of the referenced object.
106
113
  def _id
107
114
  @id
@@ -54,7 +54,7 @@ module PEROBS
54
54
 
55
55
  # Benchmark runs showed a cache size of 128 to be a good compromise
56
56
  # between read and write performance trade-offs and memory consumption.
57
- @cache = PersistentObjectCache.new(256, -1, SpaceTreeNode, self)
57
+ @cache = PersistentObjectCache.new(256, 256, SpaceTreeNode, self)
58
58
  end
59
59
 
60
60
  # Open the SpaceTree file.
data/lib/perobs/Store.rb CHANGED
@@ -599,6 +599,7 @@ module PEROBS
599
599
  @stats.swept_objects = @db.delete_unmarked_objects do |id|
600
600
  @cache.evict(id)
601
601
  end
602
+ @db.clear_marks
602
603
  GC.start
603
604
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
604
605
  @stats.swept_objects
@@ -1,4 +1,4 @@
1
1
  module PEROBS
2
2
  # The version number
3
- VERSION = "4.2.0"
3
+ VERSION = "4.3.0"
4
4
  end
@@ -265,5 +265,35 @@ describe PEROBS::FlatFileDB do
265
265
  db.close
266
266
  end
267
267
 
268
+ it 'should handle duplicate entries for the same ID in database.blobs file' do
269
+ @store.exit
270
+
271
+ db = PEROBS::FlatFileDB.new(@db_dir)
272
+ db_file = File.join(@db_dir, 'database.blobs')
273
+ db.open
274
+ 0.upto(5) do |i|
275
+ db.put_object("#{i + 1}:#{'X' * (i + 1) * 30}$", i + 1)
276
+ end
277
+ db.close
278
+
279
+ # This appends the entry 2 again
280
+ blob2 = File.read(db_file, 319 - 199, 199)
281
+ File.write(db_file, blob2, File.size(db_file))
282
+
283
+ db.open
284
+ expect(db.check_db).to eql(2)
285
+ expect(db.check_db(true)).to eql(1)
286
+ db.close
287
+ db = PEROBS::FlatFileDB.new(@db_dir, { :log => $stderr,
288
+ :log_level => Logger::WARN })
289
+ db.open
290
+ expect(db.check_db).to eql(0)
291
+
292
+ 0.upto(5) do |i|
293
+ expect(db.get_object(i + 1)).to eql("#{i + 1}:#{'X' * (i + 1) * 30}$")
294
+ end
295
+ db.close
296
+ end
297
+
268
298
  end
269
299
 
@@ -29,13 +29,25 @@ require 'perobs/FuzzyStringMatcher'
29
29
 
30
30
  module PEROBS
31
31
 
32
+ class WordRef < PEROBS::Object
33
+
34
+ attr_persist :word, :line
35
+
36
+ def initialize(store, word, line)
37
+ super(store)
38
+ self.word = word
39
+ self.line = line
40
+ end
41
+
42
+ end
43
+
32
44
  describe FuzzyStringMatcher do
33
45
 
34
46
  before(:all) do
35
47
  @db_name = generate_db_name(__FILE__)
36
48
  @store = PEROBS::Store.new(@db_name)
37
- @fsm = FuzzyStringMatcher.new(@store, 'test')
38
- @fsm2 = FuzzyStringMatcher.new(@store, 'test', true, 2)
49
+ @store['fsm'] = @fsm = @store.new(FuzzyStringMatcher)
50
+ @store['fsm2'] = @fsm2 = @store.new(FuzzyStringMatcher, true, 2)
39
51
  end
40
52
 
41
53
  after(:all) do
@@ -103,6 +115,44 @@ module PEROBS
103
115
  expect(@fsm.best_matches('foobar')).to eql([])
104
116
  end
105
117
 
118
+ it 'should find a match' do
119
+ dut = {
120
+ [ 'one' ] => [ [ 'one', 1.0 ] ],
121
+ [ 'three' ] => [ [ 'three', 1.0 ] ],
122
+ [ 'four' ]=> [ [ 'four', 1.0 ], [ 'fourteen', 0.666 ] ],
123
+ [ 'four', 1.0 ]=> [ [ 'four', 1.0 ] ],
124
+ [ 'even' ] => [ [ 'seven', 0.666 ], [ 'eleven', 0.666 ] ],
125
+ [ 'teen' ] => [ ['thirteen', 0.6666666666666666],
126
+ ['fourteen', 0.6666666666666666],
127
+ ['fifteen', 0.6666666666666666],
128
+ ['sixteen', 0.6666666666666666],
129
+ ['seventeen', 0.6666666666666666],
130
+ ['eighteen', 0.6666666666666666],
131
+ ['nineteen', 0.6666666666666666] ],
132
+ [ 'aight' ] => [ [ 'eight', 0.5 ] ],
133
+ [ 'thirdteen' ] => [ [ 'thirteen', 0.5 ] ],
134
+ [ 'shirt teen', 0.3 ] => [ [ 'thirteen', 0.333 ] ]
135
+ }
136
+ check_data_under_test(@fsm, dut)
137
+ end
138
+
139
+ it 'should sort best to worst matches' do
140
+ @fsm.clear
141
+ %w( xbar xfoox foor bar foobar barfoo foo rab baar fool xbarx
142
+ foobarx xfoobarx foo_bar ).each do |w|
143
+ @fsm.learn(w, w)
144
+ end
145
+ dut = {
146
+ [ 'foo' ] => [["foo", 1.0], ["foor", 0.5], ["foobar", 0.5],
147
+ ["fool", 0.5], ["foobarx", 0.5], ["foo_bar", 0.5],
148
+ ["barfoo", 0.5]],
149
+ [ 'bar' ] => [["bar", 1.0], ["barfoo", 0.5], ["xbar", 0.5],
150
+ ["foobar", 0.5], ["foo_bar", 0.5]],
151
+ [ 'foobar' ] => [["foobar", 1.0], ["foobarx", 0.8], ["xfoobarx", 0.6]]
152
+ }
153
+ check_data_under_test(@fsm, dut)
154
+ end
155
+
106
156
  it 'should handle a larger text' do
107
157
  text =<<-EOT
108
158
  MIT License
@@ -131,9 +181,9 @@ EOT
131
181
  @fsm2.learn(word, word)
132
182
  end
133
183
  stats = @fsm2.stats
134
- expect(stats['dictionary_size']).to eql(363)
184
+ expect(stats['dictionary_size']).to eql(352)
135
185
  expect(stats['max_list_size']).to eql(22)
136
- expect(stats['avg_list_size']).to be_within(0.001).of(2.366)
186
+ expect(stats['avg_list_size']).to be_within(0.001).of(2.409)
137
187
  end
138
188
 
139
189
  it 'should find case sensitive matches' do
@@ -145,6 +195,46 @@ EOT
145
195
  check_data_under_test(@fsm2, dut)
146
196
  end
147
197
 
198
+ it 'should support references to PEROBS objects' do
199
+ text =<<-EOT
200
+ MIT License
201
+
202
+ Permission is hereby granted, free of charge, to any person obtaining
203
+ a copy of this software and associated documentation files (the
204
+ "Software"), to deal in the Software without restriction, including
205
+ without limitation the rights to use, copy, modify, merge, publish,
206
+ distribute, sublicense, and/or sell copies of the Software, and to
207
+ permit persons to whom the Software is furnished to do so, subject to
208
+ the following conditions:
209
+ EOT
210
+
211
+ line_no = 1
212
+ @store['fsm'] = fsm = @store.new(FuzzyStringMatcher)
213
+ @store['refs'] = refs = @store.new(Array)
214
+ text.each_line do |line|
215
+ line.split.each do |word|
216
+ ref = @store.new(WordRef, word, line_no)
217
+ refs << ref
218
+ fsm.learn(word, ref)
219
+ end
220
+ line_no += 1
221
+ end
222
+
223
+ found_lines = []
224
+ fsm.best_matches('SOFTWARE').each do |match|
225
+ found_lines << match[0].line
226
+ end
227
+ expect(found_lines.sort).to eql([ 4, 5, 5, 7, 8 ])
228
+ end
229
+
230
+ it 'should with small search words' do
231
+ @fsm.clear
232
+ mats = 'Yukihiro Matsumoto'
233
+ @fsm.learn(mats)
234
+ expect(@fsm.best_matches('Yukihiro').first.first).to eql(mats)
235
+ expect(@fsm.best_matches('Mats', 0.3).first.first).to eql(mats)
236
+ end
237
+
148
238
  def check_data_under_test(fsm, dut)
149
239
  dut.each do |inputs, reference|
150
240
  key = inputs[0]
data/test/Hash_spec.rb CHANGED
@@ -31,7 +31,6 @@ require 'spec_helper'
31
31
 
32
32
  require 'perobs'
33
33
 
34
-
35
34
  class PO < PEROBS::Object
36
35
 
37
36
  attr_persist :name
@@ -68,9 +67,13 @@ describe PEROBS::Hash do
68
67
  h['po'] = po = @store.new(PO)
69
68
  po.name = 'foobar'
70
69
  h['b'] = 'B'
70
+ @store['po_key'] = po_key = @store.new(PO)
71
+ po_key.name = 'po key'
72
+ h[po_key] = 'PO Key'
71
73
 
72
74
  expect(h['a']).to eq('A')
73
75
  expect(h['b']).to eq('B')
76
+ expect(h[@store['po_key']]).to eq('PO Key')
74
77
  @store.exit
75
78
 
76
79
  @store = PEROBS::Store.new(@db_name)
@@ -78,6 +81,14 @@ describe PEROBS::Hash do
78
81
  expect(h['a']).to eq('A')
79
82
  expect(h['b']).to eq('B')
80
83
  expect(h['po'].name).to eq('foobar')
84
+ po_key = @store['po_key']
85
+ expect(po_key.name).to eq('po key')
86
+ expect(h[po_key]).to eq('PO Key')
87
+ end
88
+
89
+ it 'should not allow hash keys that conflict with internal notations' do
90
+ @store['h'] = h = @store.new(PEROBS::Hash)
91
+ expect { h['#<PEROBS::POReference id=1234>'] = 'foo'; @store.sync }.to raise_error(ArgumentError)
81
92
  end
82
93
 
83
94
  it 'should have an each method to iterate' do
data/test/Store_spec.rb CHANGED
@@ -251,6 +251,20 @@ describe PEROBS::Store do
251
251
  end
252
252
  expect(i).to eq(6)
253
253
 
254
+ capture_io { store.gc }
255
+ capture_io { expect { store.check }.to_not raise_error }
256
+ capture_io { store.exit }
257
+
258
+ store = PEROBS::Store.new(@db_file)
259
+ capture_io { expect { store.check }.to_not raise_error }
260
+
261
+ person = store['person1']
262
+ i = 0
263
+ while (person = person.related) do
264
+ i += 1
265
+ end
266
+ expect(i).to eq(6)
267
+
254
268
  capture_io { store.gc }
255
269
  capture_io { expect { store.check }.to_not raise_error }
256
270
  expect { store.delete_store }.to_not raise_error
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: perobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 4.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Schlaeger
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-02 00:00:00.000000000 Z
11
+ date: 2021-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -147,7 +147,7 @@ homepage: https://github.com/scrapper/perobs
147
147
  licenses:
148
148
  - MIT
149
149
  metadata: {}
150
- post_install_message:
150
+ post_install_message:
151
151
  rdoc_options: []
152
152
  require_paths:
153
153
  - lib
@@ -162,9 +162,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
162
  - !ruby/object:Gem::Version
163
163
  version: '0'
164
164
  requirements: []
165
- rubyforge_project:
166
- rubygems_version: 2.7.6.2
167
- signing_key:
165
+ rubygems_version: 3.2.3
166
+ signing_key:
168
167
  specification_version: 4
169
168
  summary: Persistent Ruby Object Store
170
169
  test_files: