perobs 4.2.0 → 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7790ee42586bb2b8fca115f93ed277c4a8057f7a7027b356baea7b066da953e5
4
- data.tar.gz: 110e0710a84ef544a4874cf868ec1662dfc900077d1894b24f53bcdaeaeeed34
3
+ metadata.gz: a61fc945e0ef9f5ed6558080931d2acae42cc0401f375275684e4ee32fefe4f7
4
+ data.tar.gz: 4d864fdc0791aa78d8c180b4686ee825cd25e209284fca1966f144813c063280
5
5
  SHA512:
6
- metadata.gz: d95d845c7e8bd183f53b60369415bde86cd766c224bcbc2c52c870c60542a786f359a63eecc4e8829055cee6a1674bc952277749da183432b4b3abae7536efbb
7
- data.tar.gz: 5fa1712fb01118d955d86396aec87b319c75856f6602ccf1a19f475a3dc64dc65a540e35271e6f2c6c7ee6a10c5cb03da30d43945a0dc264c7b0e48f575269d1
6
+ metadata.gz: f3834a9caae693d82837fb9f75141cb35e85f1a2c1439d1bb898f8578d9ae082f46deb0233d2b8a02d6ae6b0bf66862098b47ff571ff3d9a6b874fadaef6d23a
7
+ data.tar.gz: 883f1b5e553fae2be0039aa090d89bf6eb44ec1d0dc31488aeaa727ec8bc2844c9b722568cd7dde0b33c507121afd11e720154d4ff27e66aa3ac3812d5603954
data/README.md CHANGED
@@ -108,7 +108,7 @@ class Person < PEROBS::Object
108
108
  attr_init(:father) do { @store.new(Person, 'Dad') }
109
109
  end
110
110
 
111
- def merry(spouse)
111
+ def marry(spouse)
112
112
  self.spouse = spouse
113
113
  self.status = :married
114
114
  end
data/lib/perobs.rb CHANGED
@@ -28,3 +28,4 @@
28
28
  require "perobs/version"
29
29
  require 'perobs/Store'
30
30
  require 'perobs/ConsoleProgressMeter'
31
+ require 'perobs/FuzzyStringMatcher'
data/lib/perobs/BTree.rb CHANGED
@@ -70,7 +70,7 @@ module PEROBS
70
70
  @nodes.register_custom_data('first_leaf')
71
71
  @nodes.register_custom_data('last_leaf')
72
72
  @nodes.register_custom_data('btree_size')
73
- @node_cache = PersistentObjectCache.new(2**16, -1, BTreeNode, self)
73
+ @node_cache = PersistentObjectCache.new(2**13, 2**13, BTreeNode, self)
74
74
  @root = @first_leaf = @last_leaf = nil
75
75
  @size = 0
76
76
 
@@ -190,7 +190,7 @@ module PEROBS
190
190
  "Number of leave nodes: #{stats.leave_nodes}; " +
191
191
  "Number of leaves: #{stats.leaves}"
192
192
 
193
- !stats.nil?
193
+ true
194
194
  end
195
195
 
196
196
  # Register a new node as root node of the tree.
@@ -59,7 +59,7 @@ module PEROBS
59
59
  # if not
60
60
  def initialize(tree, node_address = nil, parent = nil, is_leaf = true,
61
61
  prev_sibling = nil, next_sibling = nil,
62
- keys = [], values = [], children = [])
62
+ keys = nil, values = nil, children = nil)
63
63
  @tree = tree
64
64
  if node_address == 0
65
65
  PEROBS.log.fatal "Node address may not be 0"
@@ -68,13 +68,13 @@ module PEROBS
68
68
  @parent = link(parent)
69
69
  @prev_sibling = link(prev_sibling)
70
70
  @next_sibling = link(next_sibling)
71
- @keys = keys
71
+ @keys = keys || []
72
72
  if (@is_leaf = is_leaf)
73
- @values = values
74
- @children = []
73
+ @values = values || []
74
+ @children = nil
75
75
  else
76
- @children = children
77
- @values = []
76
+ @children = children || []
77
+ @values = nil
78
78
  end
79
79
  end
80
80
 
@@ -585,11 +585,11 @@ module PEROBS
585
585
  end
586
586
 
587
587
  def trim(idx)
588
- @keys = @keys[0..idx - 1]
588
+ @keys.slice!(idx, @keys.length - idx)
589
589
  if @is_leaf
590
- @values = @values[0..idx - 1]
590
+ @values.slice!(idx, @values.length - idx)
591
591
  else
592
- @children = @children[0..idx]
592
+ @children.slice!(idx + 1, @children.length - idx - 1)
593
593
  end
594
594
  @tree.node_cache.insert(self)
595
595
  end
@@ -654,13 +654,18 @@ module PEROBS
654
654
  # @yield [key, value]
655
655
  # @return [nil or Hash] nil in case of errors or a hash with some
656
656
  # statistical information about the tree
657
- def check
657
+ def check(&block)
658
658
  stats = Stats.new(nil, 0, 0, 0)
659
659
 
660
660
  traverse do |node, position, stack|
661
661
  if position == 0
662
662
  stats.nodes_count += 1
663
663
  if node.parent
664
+ unless node.parent.is_a?(BTreeNodeLink)
665
+ node.error "parent is a #{node.parent.class} instead of a " +
666
+ "BTreeNodeLink"
667
+ return nil
668
+ end
664
669
  # After a split the nodes will only have half the maximum keys.
665
670
  # For branch nodes one of the split nodes will have even 1 key
666
671
  # less as this will become the branch key in a parent node.
@@ -695,6 +700,16 @@ module PEROBS
695
700
  else
696
701
  stats.branch_depth = node.tree_level
697
702
  end
703
+ if node.prev_sibling && !node.prev_sibling.is_a?(BTreeNodeLink)
704
+ node.error "prev_sibling is a #{node.prev_sibling.class} " +
705
+ "instead of a BTreeNodeLink"
706
+ return nil
707
+ end
708
+ if node.next_sibling && !node.next_sibling.is_a?(BTreeNodeLink)
709
+ node.error "next_sibling is a #{node.next_sibling.class} " +
710
+ "instead of a BTreeNodeLink"
711
+ return nil
712
+ end
698
713
  if node.prev_sibling.nil? && @tree.first_leaf != node
699
714
  node.error "Leaf node #{node.node_address} has no previous " +
700
715
  "sibling but is not the first leaf of the tree"
@@ -708,9 +723,9 @@ module PEROBS
708
723
  unless node.keys.size == node.values.size
709
724
  node.error "Key count (#{node.keys.size}) and value " +
710
725
  "count (#{node.values.size}) don't match"
711
- return nil
726
+ return nil
712
727
  end
713
- unless node.children.empty?
728
+ unless node.children.nil?
714
729
  node.error "@children must be nil for a leaf node"
715
730
  return nil
716
731
  end
@@ -718,14 +733,14 @@ module PEROBS
718
733
  stats.leave_nodes += 1
719
734
  stats.leaves += node.keys.length
720
735
  else
721
- unless node.values.empty?
736
+ unless node.values.nil?
722
737
  node.error "@values must be nil for a branch node"
723
738
  return nil
724
739
  end
725
740
  unless node.children.size == node.keys.size + 1
726
741
  node.error "Key count (#{node.keys.size}) must be one " +
727
742
  "less than children count (#{node.children.size})"
728
- return nil
743
+ return nil
729
744
  end
730
745
  node.children.each_with_index do |child, i|
731
746
  unless child.is_a?(BTreeNodeLink)
@@ -789,7 +804,9 @@ module PEROBS
789
804
  else
790
805
  if block_given?
791
806
  # If a block was given, call this block with the key and value.
792
- return nil unless yield(node.keys[index], node.values[index])
807
+ unless yield(node.keys[index], node.values[index])
808
+ return nil
809
+ end
793
810
  end
794
811
  end
795
812
  end
@@ -293,7 +293,7 @@ module PEROBS
293
293
  header = FlatFileBlobHeader.read(@f, addr, id)
294
294
  if header.id != id
295
295
  PEROBS.log.fatal "Database index corrupted: Index for object " +
296
- "#{id} points to object with ID #{header.id}"
296
+ "#{id} points to object with ID #{header.id} at address #{addr}"
297
297
  end
298
298
 
299
299
  buf = nil
@@ -302,7 +302,8 @@ module PEROBS
302
302
  @f.seek(addr + FlatFileBlobHeader::LENGTH)
303
303
  buf = @f.read(header.length)
304
304
  rescue IOError => e
305
- PEROBS.log.fatal "Cannot read blob for ID #{id}: #{e.message}"
305
+ PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " +
306
+ e.message
306
307
  end
307
308
 
308
309
  # Uncompress the data if the compression bit is set in the flags byte.
@@ -311,12 +312,13 @@ module PEROBS
311
312
  buf = Zlib.inflate(buf)
312
313
  rescue Zlib::BufError, Zlib::DataError
313
314
  PEROBS.log.fatal "Corrupted compressed block with ID " +
314
- "#{header.id} found."
315
+ "#{id} found at address #{addr}."
315
316
  end
316
317
  end
317
318
 
318
319
  if checksum(buf) != header.crc
319
- PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
320
+ PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " +
321
+ "at address #{addr}"
320
322
  end
321
323
 
322
324
  buf
@@ -339,7 +341,7 @@ module PEROBS
339
341
  if @marks
340
342
  @marks.clear
341
343
  else
342
- @marks = IDList.new(@db_dir, 'marks', 8)
344
+ @marks = IDList.new(@db_dir, 'marks', item_counter)
343
345
  end
344
346
  end
345
347
 
@@ -452,16 +454,14 @@ module PEROBS
452
454
  regenerate_index_and_spaces
453
455
  end
454
456
 
455
- # Check (and repair) the FlatFile.
456
- # @param repair [Boolean] True if errors should be fixed.
457
+ # Check the FlatFile.
457
458
  # @return [Integer] Number of errors found
458
- def check(repair = false)
459
+ def check()
459
460
  errors = 0
460
461
  return errors unless @f
461
462
 
462
463
  t = Time.now
463
- PEROBS.log.info "Checking FlatFile database" +
464
- "#{repair ? ' in repair mode' : ''}..."
464
+ PEROBS.log.info "Checking FlatFile database..."
465
465
 
466
466
  # First check the database blob file. Each entry should be readable and
467
467
  # correct and all IDs must be unique. We use a shadow index to keep
@@ -483,7 +483,6 @@ module PEROBS
483
483
  if buf.bytesize != header.length
484
484
  PEROBS.log.error "Premature end of file in blob with ID " +
485
485
  "#{header.id}."
486
- discard_damaged_blob(header) if repair
487
486
  errors += 1
488
487
  next
489
488
  end
@@ -496,7 +495,6 @@ module PEROBS
496
495
  rescue Zlib::BufError, Zlib::DataError
497
496
  PEROBS.log.error "Corrupted compressed block with ID " +
498
497
  "#{header.id} found."
499
- discard_damaged_blob(header) if repair
500
498
  errors += 1
501
499
  next
502
500
  end
@@ -505,7 +503,6 @@ module PEROBS
505
503
  if header.crc && checksum(buf) != header.crc
506
504
  PEROBS.log.error "Checksum failure while checking blob " +
507
505
  "with ID #{header.id}"
508
- discard_damaged_blob(header) if repair
509
506
  errors += 1
510
507
  next
511
508
  end
@@ -521,22 +518,6 @@ module PEROBS
521
518
  errors += 1
522
519
  previous_header = FlatFileBlobHeader.read(@f, previous_address,
523
520
  header.id)
524
- if repair
525
- # We have two blobs with the same ID and we must discard one of
526
- # them.
527
- if header.is_outdated?
528
- discard_damaged_blob(header)
529
- elsif previous_header.is_outdated?
530
- discard_damaged_blob(previous_header)
531
- else
532
- PEROBS.log.error "None of the blobs with same ID have " +
533
- "the outdated flag set. Deleting the smaller one."
534
- errors += 1
535
- discard_damaged_blob(header.length < previous_header.length ?
536
- header : previous_header)
537
- end
538
- next
539
- end
540
521
  else
541
522
  # ID is unique so far. Add it to the shadow index.
542
523
  new_index.insert(header.id, header.addr)
@@ -553,12 +534,6 @@ module PEROBS
553
534
  PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
554
535
  'bytes found at the end of FlatFile.'
555
536
  corrupted_blobs += 1
556
- if repair
557
- PEROBS.log.error "Truncating FlatFile to " +
558
- "#{end_of_last_healthy_blob} bytes by discarding " +
559
- "#{@f.size - end_of_last_healthy_blob} bytes"
560
- @f.truncate(end_of_last_healthy_blob)
561
- end
562
537
  end
563
538
 
564
539
  errors += corrupted_blobs
@@ -568,17 +543,19 @@ module PEROBS
568
543
  new_index.close
569
544
  new_index.erase
570
545
 
571
- if repair && corrupted_blobs > 0
572
- erase_index_files
573
- defragmentize
574
- regenerate_index_and_spaces
575
- elsif corrupted_blobs == 0
546
+ if corrupted_blobs == 0
576
547
  # Now we check the index data. It must be correct and the entries must
577
548
  # match the blob file. All entries in the index must be in the blob file
578
549
  # and vise versa.
579
550
  begin
580
551
  index_ok = @index.check do |id, address|
581
- has_id_at?(id, address)
552
+ unless has_id_at?(id, address)
553
+ PEROBS.log.error "Index contains an entry for " +
554
+ "ID #{id} at address #{address} that is not in FlatFile"
555
+ false
556
+ else
557
+ true
558
+ end
582
559
  end
583
560
  x_check_errs = 0
584
561
  space_check_ok = true
@@ -586,16 +563,13 @@ module PEROBS
586
563
  (x_check_errs = cross_check_entries) == 0
587
564
  errors += 1 unless index_ok && space_check_ok
588
565
  errors += x_check_errs
589
- regenerate_index_and_spaces if repair
590
566
  end
591
567
  rescue PEROBS::FatalError
592
568
  errors += 1
593
- regenerate_index_and_spaces if repair
594
569
  end
595
570
  end
596
571
 
597
- sync if repair
598
- PEROBS.log.info "check_db completed in #{Time.now - t} seconds. " +
572
+ PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " +
599
573
  "#{errors} errors found."
600
574
 
601
575
  errors
@@ -687,17 +661,7 @@ module PEROBS
687
661
  header.id)
688
662
  # We have two blobs with the same ID and we must discard one of
689
663
  # them.
690
- if header.is_outdated?
691
- discard_damaged_blob(header)
692
- elsif previous_header.is_outdated?
693
- discard_damaged_blob(previous_header)
694
- else
695
- PEROBS.log.error "None of the blobs with same ID have " +
696
- "the outdated flag set. Deleting the smaller one."
697
- errors += 1
698
- discard_damaged_blob(header.length < previous_header.length ?
699
- header : previous_header)
700
- end
664
+ discard_duplicate_blobs(header, previous_header)
701
665
  else
702
666
  # ID is unique so far. Add it to the shadow index.
703
667
  @index.insert(header.id, header.addr)
@@ -927,6 +891,23 @@ module PEROBS
927
891
  header.clear_flags
928
892
  end
929
893
 
894
+ def discard_duplicate_blobs(header, previous_header)
895
+ if header.is_outdated?
896
+ discard_damaged_blob(header)
897
+ elsif previous_header.is_outdated?
898
+ discard_damaged_blob(previous_header)
899
+ else
900
+ smaller, larger = header.length < previous_header.length ?
901
+ [ header, previous_header ] : [ previous_header, header ]
902
+ PEROBS.log.error "None of the blobs with same ID have " +
903
+ "the outdated flag set. Deleting the smaller one " +
904
+ "at address #{smaller.addr}"
905
+ discard_damaged_blob(smaller)
906
+ @space_list.add_space(smaller.addr, smaller.length)
907
+ @index.insert(larger.id, larger.addr)
908
+ end
909
+ end
910
+
930
911
  def open_index_files(abort_on_missing_files = false)
931
912
  begin
932
913
  @index.open(abort_on_missing_files)
@@ -26,40 +26,42 @@
26
26
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
27
 
28
28
  require 'perobs/Log'
29
- require 'perobs/ObjectBase'
29
+ require 'perobs/Object'
30
30
 
31
31
  module PEROBS
32
32
 
33
33
  # The fuzzy string matcher can be used to perform a fuzzy string search
34
34
  # against a known set of strings. The dictionary of known strings does not
35
- # store the actual strings but references to arbitrary objects. These could
36
- # be the string, but can be something else related to the learned strings.
37
- # To use this class a list of strings with their references must be learned.
38
- # Once the dictionary has been established, fuzzy matches can be done.
39
- class FuzzyStringMatcher
35
+ # store the actual strings but references to String or PEROBS objects.
36
+ # Once the dictionary has been established, fuzzy matches can be done. Since
37
+ # the actual input strings are not directly stored, you cannot remove or
38
+ # modified already stored strings. To remove strings, you have to clear the
39
+ # matcher and add the strings again that you want to keep.
40
+ class FuzzyStringMatcher < PEROBS::Object
41
+
42
+ attr_persist :case_sensitive, :n, :dict
40
43
 
41
44
  # Create a new FuzzyStringMatcher.
42
- # @param store [PEROBS::Store] place to store the dictionary
43
- # @param name [String] Unique name of the string matcher
45
+ # @param p [PEROBS::Store] place to store the dictionary
44
46
  # @param case_sensitive [Boolean] True if case matters for matching
45
47
  # @param n [Integer] Determines what kind of n-gramm is used to store the
46
48
  # references in the dictionary. It also determines the minimum word
47
- # length that can be used for fuzzy matches.
48
- def initialize(store, name, case_sensitive = false, n = 4)
49
- @store = store
50
- @dict_name = "FuzzyStringMatcher::#{name}"
49
+ # length that can be used for fuzzy matches. Values between 2 and
50
+ # 10 are supported. The default is 4.
51
+ def initialize(p, case_sensitive = false, n = 4)
52
+ super(p)
51
53
  if n < 2 || n > 10
52
54
  raise ArgumentError, 'n must be between 2 and 10'
53
55
  end
54
- @case_sensitive = case_sensitive
55
- @n = n
56
+ self.case_sensitive = case_sensitive
57
+ self.n = n
56
58
 
57
- clear unless (@dict = @store[@dict_name])
59
+ clear unless @dict
58
60
  end
59
61
 
60
62
  # Wipe the dictionary.
61
63
  def clear
62
- @store[@dict_name] = @dict = @store.new(BigHash)
64
+ self.dict = @store.new(BigHash)
63
65
  end
64
66
 
65
67
  # Add a string with its reference to the dictionary.
@@ -79,11 +81,8 @@ module PEROBS
79
81
  @dict[n_gramm] = ng_list = @store.new(Hash)
80
82
  end
81
83
 
82
- if ng_list.include?(reference)
83
- ng_list[reference] += 1
84
- else
85
- ng_list[reference] = 0
86
- end
84
+ # We use the Hash as a Set. The value doesn't matter.
85
+ ng_list[reference] = true unless ng_list.include?(reference)
87
86
  end
88
87
 
89
88
  nil
@@ -109,22 +108,12 @@ module PEROBS
109
108
 
110
109
  matches = {}
111
110
 
112
- # This will be the best possible score for a perfect match.
113
- best_possible_score = 0
114
111
  each_n_gramm(string) do |n_gramm|
115
- best_possible_score += 1
116
112
  if (ng_list = @dict[n_gramm])
117
- ng_list.each do |reference, count|
113
+ ng_list.each do |reference, dummy|
118
114
  if matches.include?(reference)
119
115
  matches[reference] += 1
120
116
  else
121
- # We use internally a 10 times larger list so that we don't
122
- # throw away good matches too early. If the max_count value is
123
- # chosen too small there is a risk of not finding the best
124
- # matches!
125
- if matches.size > 10 * max_count
126
- matches = discard_worst_match(matches)
127
- end
128
117
  matches[reference] = 1
129
118
  end
130
119
  end
@@ -133,19 +122,23 @@ module PEROBS
133
122
 
134
123
  return [] if matches.empty?
135
124
 
136
- # Sort in the order of occurance count downwards.
137
- match_list = matches.to_a.sort do |a, b|
138
- b[1] <=> a[1]
139
- end
125
+ match_list = matches.to_a
140
126
 
141
127
  # Set occurance counters to scores relative to the best possible score.
128
+ # This will be the best possible score for a perfect match.
129
+ best_possible_score = string.length - @n + 1
142
130
  match_list.map! { |a, b| [ a, b.to_f / best_possible_score ] }
143
131
 
144
- # Delete all matches that occured less than half as often than the
145
- # top match.
132
+ # Delete all matches that don't have the required minimum match score.
146
133
  match_list.delete_if { |a| a[1] < min_score }
147
134
 
148
- match_list[0..max_count]
135
+ # Sort the list best to worst match
136
+ match_list.sort! do |a, b|
137
+ b[1] <=> a[1]
138
+ end
139
+
140
+ # Return the top max_count matches.
141
+ match_list[0..max_count - 1]
149
142
  end
150
143
 
151
144
  # Returns some internal stats about the dictionary.
@@ -176,16 +169,6 @@ module PEROBS
176
169
  end
177
170
  end
178
171
 
179
- def discard_worst_match(matches)
180
- # Sort in the order of occurance count downwards.
181
- match_list = matches.to_a.sort do |a, b|
182
- b[1] <=> a[1]
183
- end
184
- # Discard the lowest half of the matches
185
- match_list = match_list[0..match_list.length / 2]
186
- match_list.to_h
187
- end
188
-
189
172
  end
190
173
 
191
174
  end
data/lib/perobs/Hash.rb CHANGED
@@ -124,9 +124,9 @@ module PEROBS
124
124
 
125
125
  # Proxy for assignment method.
126
126
  def []=(key, value)
127
- unless key.is_a?(String)
128
- raise ArgumentError, "PEROBS::Hash[] key must be a String but is a " +
129
- "#{key.class}"
127
+ unless key.is_a?(String) || key.respond_to?(:is_poxreference?)
128
+ raise ArgumentError, "PEROBS::Hash[] key must be a String or " +
129
+ "a PEROBS object but is a #{key.class}"
130
130
  end
131
131
  _check_assignment_value(value)
132
132
  @store.cache.cache_write(self)
@@ -143,18 +143,33 @@ module PEROBS
143
143
  # is referencing.
144
144
  # @return [Array of Integer] IDs of referenced objects
145
145
  def _referenced_object_ids
146
- @data.each_value.select { |v| v && v.respond_to?(:is_poxreference?) }.
147
- map { |o| o.id }
146
+ ids = []
147
+ @data.each do |k, v|
148
+ if k && k.respond_to?(:is_poxreference?)
149
+ ids << k.id
150
+ end
151
+ if v && v.respond_to?(:is_poxreference?)
152
+ ids << v.id
153
+ end
154
+ end
155
+
156
+ ids
148
157
  end
149
158
 
150
159
  # This method should only be used during store repair operations. It will
151
160
  # delete all referenced to the given object ID.
152
161
  # @param id [Integer] targeted object ID
153
162
  def _delete_reference_to_id(id)
163
+ original_length = @data.length
164
+
154
165
  @data.delete_if do |k, v|
155
- v && v.respond_to?(:is_poxreference?) && v.id == id
166
+ (k && k.respond_to?(:is_poxreference?) && k.id == id) ||
167
+ (v && v.respond_to?(:is_poxreference?) && v.id == id)
168
+ end
169
+
170
+ if @data.length != original_length
171
+ @store.cache.cache_write(self)
156
172
  end
157
- @store.cache.cache_write(self)
158
173
  end
159
174
 
160
175
  # Restore the persistent data from a single data structure.
@@ -163,8 +178,18 @@ module PEROBS
163
178
  # @private
164
179
  def _deserialize(data)
165
180
  @data = {}
166
- data.each { |k, v| @data[k] = v.is_a?(POReference) ?
167
- POXReference.new(@store, v.id) : v }
181
+
182
+ data.each do |k, v|
183
+ # References to other PEROBS Objects are marshalled with our own
184
+ # format. If we detect such a marshalled String we convert it into a
185
+ # POXReference object.
186
+ if (match = /^#<PEROBS::POReference id=([0-9]+)>$/.match(k))
187
+ k = POXReference.new(@store, match[1].to_i)
188
+ end
189
+ dv = v.is_a?(POReference) ? POXReference.new(@store, v.id) : v
190
+ @data[k] = dv
191
+ end
192
+
168
193
  @data
169
194
  end
170
195
 
@@ -185,26 +210,46 @@ module PEROBS
185
210
  data = {}
186
211
 
187
212
  @data.each do |k, v|
188
- if v.respond_to?(:is_poxreference?)
189
- data[k] = POReference.new(v.id)
190
- else
191
- # Outside of the PEROBS library all PEROBS::ObjectBase derived
192
- # objects should not be used directly. The library only exposes them
193
- # via POXReference proxy objects.
194
- if v.is_a?(ObjectBase)
195
- PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
196
- "It is stored in a PEROBS::Hash with key #{k.inspect}. " +
197
- 'Have you used self() instead of myself() to ' +
198
- "get the reference of this PEROBS object?\n" +
199
- v.inspect
200
- end
201
- data[k] = v
213
+ if k.respond_to?(:is_poxreference?)
214
+ # JSON only supports Strings as hash keys. Since JSON is the default
215
+ # internal storage format in the database, we have to marshall
216
+ # PEROBS::Object references ourselves.
217
+ k = "#<PEROBS::POReference id=#{k.id}>"
218
+ elsif k[0..24] == '#<PEROBS::POReference id='
219
+ # This could obviously result in conflicts with 'normal' String hash
220
+ # keys. This is extremely unlikely, but we better catch this case
221
+ # before it causes hard to debug trouble.
222
+ raise ArgumentError, "Hash key #{k} conflicts with PEROBS " +
223
+ "internal representation of marshalled hash keys!"
202
224
  end
225
+ data[k] = serialize_helper(v)
203
226
  end
204
227
 
205
228
  data
206
229
  end
207
230
 
231
+ def serialize_helper(v)
232
+ if v.respond_to?(:is_poxreference?)
233
+ # References to other PEROBS objects (POXReference) are stored as
234
+ # POReference in the database.
235
+ return POReference.new(v.id)
236
+ else
237
+ # Outside of the PEROBS library all PEROBS::ObjectBase derived
238
+ # objects should not be used directly. The library only exposes them
239
+ # via POXReference proxy objects.
240
+ if v.is_a?(ObjectBase)
241
+ PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
242
+ "It is stored in a PEROBS::Hash. " +
243
+ 'Have you used self() instead of myself() to ' +
244
+ "get the reference of this PEROBS object?\n" +
245
+ v.inspect
246
+ end
247
+
248
+ # All other objects are serialized by their native methods.
249
+ return v
250
+ end
251
+ end
252
+
208
253
  end
209
254
 
210
255
  end
@@ -54,7 +54,8 @@ module PEROBS
54
54
  @file_name = File.join(dir, name + '.cache')
55
55
  @page_size = page_size
56
56
  open
57
- @pages = PersistentObjectCache.new(max_in_memory, -1, IDListPage, self)
57
+ @pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
58
+ IDListPage, self)
58
59
  @page_counter = 0
59
60
  end
60
61
 
@@ -102,6 +102,13 @@ module PEROBS
102
102
  end
103
103
  end
104
104
 
105
+ # To allow POXReference objects to be used as Hash keys we need to
106
+ # implement this function. Conveniently, we can just use the PEROBS object
107
+ # ID since that is unique.
108
+ def hash
109
+ @id
110
+ end
111
+
105
112
  # Shortcut to access the _id() method of the referenced object.
106
113
  def _id
107
114
  @id
@@ -54,7 +54,7 @@ module PEROBS
54
54
 
55
55
  # Benchmark runs showed a cache size of 128 to be a good compromise
56
56
  # between read and write performance trade-offs and memory consumption.
57
- @cache = PersistentObjectCache.new(256, -1, SpaceTreeNode, self)
57
+ @cache = PersistentObjectCache.new(256, 256, SpaceTreeNode, self)
58
58
  end
59
59
 
60
60
  # Open the SpaceTree file.
data/lib/perobs/Store.rb CHANGED
@@ -599,6 +599,7 @@ module PEROBS
599
599
  @stats.swept_objects = @db.delete_unmarked_objects do |id|
600
600
  @cache.evict(id)
601
601
  end
602
+ @db.clear_marks
602
603
  GC.start
603
604
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
604
605
  @stats.swept_objects
@@ -1,4 +1,4 @@
1
1
  module PEROBS
2
2
  # The version number
3
- VERSION = "4.2.0"
3
+ VERSION = "4.3.0"
4
4
  end
@@ -265,5 +265,35 @@ describe PEROBS::FlatFileDB do
265
265
  db.close
266
266
  end
267
267
 
268
+ it 'should handle duplicate entries for the same ID in database.blobs file' do
269
+ @store.exit
270
+
271
+ db = PEROBS::FlatFileDB.new(@db_dir)
272
+ db_file = File.join(@db_dir, 'database.blobs')
273
+ db.open
274
+ 0.upto(5) do |i|
275
+ db.put_object("#{i + 1}:#{'X' * (i + 1) * 30}$", i + 1)
276
+ end
277
+ db.close
278
+
279
+ # This appends the entry 2 again
280
+ blob2 = File.read(db_file, 319 - 199, 199)
281
+ File.write(db_file, blob2, File.size(db_file))
282
+
283
+ db.open
284
+ expect(db.check_db).to eql(2)
285
+ expect(db.check_db(true)).to eql(1)
286
+ db.close
287
+ db = PEROBS::FlatFileDB.new(@db_dir, { :log => $stderr,
288
+ :log_level => Logger::WARN })
289
+ db.open
290
+ expect(db.check_db).to eql(0)
291
+
292
+ 0.upto(5) do |i|
293
+ expect(db.get_object(i + 1)).to eql("#{i + 1}:#{'X' * (i + 1) * 30}$")
294
+ end
295
+ db.close
296
+ end
297
+
268
298
  end
269
299
 
@@ -29,13 +29,25 @@ require 'perobs/FuzzyStringMatcher'
29
29
 
30
30
  module PEROBS
31
31
 
32
+ class WordRef < PEROBS::Object
33
+
34
+ attr_persist :word, :line
35
+
36
+ def initialize(store, word, line)
37
+ super(store)
38
+ self.word = word
39
+ self.line = line
40
+ end
41
+
42
+ end
43
+
32
44
  describe FuzzyStringMatcher do
33
45
 
34
46
  before(:all) do
35
47
  @db_name = generate_db_name(__FILE__)
36
48
  @store = PEROBS::Store.new(@db_name)
37
- @fsm = FuzzyStringMatcher.new(@store, 'test')
38
- @fsm2 = FuzzyStringMatcher.new(@store, 'test', true, 2)
49
+ @store['fsm'] = @fsm = @store.new(FuzzyStringMatcher)
50
+ @store['fsm2'] = @fsm2 = @store.new(FuzzyStringMatcher, true, 2)
39
51
  end
40
52
 
41
53
  after(:all) do
@@ -103,6 +115,44 @@ module PEROBS
103
115
  expect(@fsm.best_matches('foobar')).to eql([])
104
116
  end
105
117
 
118
+ it 'should find a match' do
119
+ dut = {
120
+ [ 'one' ] => [ [ 'one', 1.0 ] ],
121
+ [ 'three' ] => [ [ 'three', 1.0 ] ],
122
+ [ 'four' ]=> [ [ 'four', 1.0 ], [ 'fourteen', 0.666 ] ],
123
+ [ 'four', 1.0 ]=> [ [ 'four', 1.0 ] ],
124
+ [ 'even' ] => [ [ 'seven', 0.666 ], [ 'eleven', 0.666 ] ],
125
+ [ 'teen' ] => [ ['thirteen', 0.6666666666666666],
126
+ ['fourteen', 0.6666666666666666],
127
+ ['fifteen', 0.6666666666666666],
128
+ ['sixteen', 0.6666666666666666],
129
+ ['seventeen', 0.6666666666666666],
130
+ ['eighteen', 0.6666666666666666],
131
+ ['nineteen', 0.6666666666666666] ],
132
+ [ 'aight' ] => [ [ 'eight', 0.5 ] ],
133
+ [ 'thirdteen' ] => [ [ 'thirteen', 0.5 ] ],
134
+ [ 'shirt teen', 0.3 ] => [ [ 'thirteen', 0.333 ] ]
135
+ }
136
+ check_data_under_test(@fsm, dut)
137
+ end
138
+
139
+ it 'should sort best to worst matches' do
140
+ @fsm.clear
141
+ %w( xbar xfoox foor bar foobar barfoo foo rab baar fool xbarx
142
+ foobarx xfoobarx foo_bar ).each do |w|
143
+ @fsm.learn(w, w)
144
+ end
145
+ dut = {
146
+ [ 'foo' ] => [["foo", 1.0], ["foor", 0.5], ["foobar", 0.5],
147
+ ["fool", 0.5], ["foobarx", 0.5], ["foo_bar", 0.5],
148
+ ["barfoo", 0.5]],
149
+ [ 'bar' ] => [["bar", 1.0], ["barfoo", 0.5], ["xbar", 0.5],
150
+ ["foobar", 0.5], ["foo_bar", 0.5]],
151
+ [ 'foobar' ] => [["foobar", 1.0], ["foobarx", 0.8], ["xfoobarx", 0.6]]
152
+ }
153
+ check_data_under_test(@fsm, dut)
154
+ end
155
+
106
156
  it 'should handle a larger text' do
107
157
  text =<<-EOT
108
158
  MIT License
@@ -131,9 +181,9 @@ EOT
131
181
  @fsm2.learn(word, word)
132
182
  end
133
183
  stats = @fsm2.stats
134
- expect(stats['dictionary_size']).to eql(363)
184
+ expect(stats['dictionary_size']).to eql(352)
135
185
  expect(stats['max_list_size']).to eql(22)
136
- expect(stats['avg_list_size']).to be_within(0.001).of(2.366)
186
+ expect(stats['avg_list_size']).to be_within(0.001).of(2.409)
137
187
  end
138
188
 
139
189
  it 'should find case sensitive matches' do
@@ -145,6 +195,46 @@ EOT
145
195
  check_data_under_test(@fsm2, dut)
146
196
  end
147
197
 
198
+ it 'should support references to PEROBS objects' do
199
+ text =<<-EOT
200
+ MIT License
201
+
202
+ Permission is hereby granted, free of charge, to any person obtaining
203
+ a copy of this software and associated documentation files (the
204
+ "Software"), to deal in the Software without restriction, including
205
+ without limitation the rights to use, copy, modify, merge, publish,
206
+ distribute, sublicense, and/or sell copies of the Software, and to
207
+ permit persons to whom the Software is furnished to do so, subject to
208
+ the following conditions:
209
+ EOT
210
+
211
+ line_no = 1
212
+ @store['fsm'] = fsm = @store.new(FuzzyStringMatcher)
213
+ @store['refs'] = refs = @store.new(Array)
214
+ text.each_line do |line|
215
+ line.split.each do |word|
216
+ ref = @store.new(WordRef, word, line_no)
217
+ refs << ref
218
+ fsm.learn(word, ref)
219
+ end
220
+ line_no += 1
221
+ end
222
+
223
+ found_lines = []
224
+ fsm.best_matches('SOFTWARE').each do |match|
225
+ found_lines << match[0].line
226
+ end
227
+ expect(found_lines.sort).to eql([ 4, 5, 5, 7, 8 ])
228
+ end
229
+
230
+ it 'should with small search words' do
231
+ @fsm.clear
232
+ mats = 'Yukihiro Matsumoto'
233
+ @fsm.learn(mats)
234
+ expect(@fsm.best_matches('Yukihiro').first.first).to eql(mats)
235
+ expect(@fsm.best_matches('Mats', 0.3).first.first).to eql(mats)
236
+ end
237
+
148
238
  def check_data_under_test(fsm, dut)
149
239
  dut.each do |inputs, reference|
150
240
  key = inputs[0]
data/test/Hash_spec.rb CHANGED
@@ -31,7 +31,6 @@ require 'spec_helper'
31
31
 
32
32
  require 'perobs'
33
33
 
34
-
35
34
  class PO < PEROBS::Object
36
35
 
37
36
  attr_persist :name
@@ -68,9 +67,13 @@ describe PEROBS::Hash do
68
67
  h['po'] = po = @store.new(PO)
69
68
  po.name = 'foobar'
70
69
  h['b'] = 'B'
70
+ @store['po_key'] = po_key = @store.new(PO)
71
+ po_key.name = 'po key'
72
+ h[po_key] = 'PO Key'
71
73
 
72
74
  expect(h['a']).to eq('A')
73
75
  expect(h['b']).to eq('B')
76
+ expect(h[@store['po_key']]).to eq('PO Key')
74
77
  @store.exit
75
78
 
76
79
  @store = PEROBS::Store.new(@db_name)
@@ -78,6 +81,14 @@ describe PEROBS::Hash do
78
81
  expect(h['a']).to eq('A')
79
82
  expect(h['b']).to eq('B')
80
83
  expect(h['po'].name).to eq('foobar')
84
+ po_key = @store['po_key']
85
+ expect(po_key.name).to eq('po key')
86
+ expect(h[po_key]).to eq('PO Key')
87
+ end
88
+
89
+ it 'should not allow hash keys that conflict with internal notations' do
90
+ @store['h'] = h = @store.new(PEROBS::Hash)
91
+ expect { h['#<PEROBS::POReference id=1234>'] = 'foo'; @store.sync }.to raise_error(ArgumentError)
81
92
  end
82
93
 
83
94
  it 'should have an each method to iterate' do
data/test/Store_spec.rb CHANGED
@@ -251,6 +251,20 @@ describe PEROBS::Store do
251
251
  end
252
252
  expect(i).to eq(6)
253
253
 
254
+ capture_io { store.gc }
255
+ capture_io { expect { store.check }.to_not raise_error }
256
+ capture_io { store.exit }
257
+
258
+ store = PEROBS::Store.new(@db_file)
259
+ capture_io { expect { store.check }.to_not raise_error }
260
+
261
+ person = store['person1']
262
+ i = 0
263
+ while (person = person.related) do
264
+ i += 1
265
+ end
266
+ expect(i).to eq(6)
267
+
254
268
  capture_io { store.gc }
255
269
  capture_io { expect { store.check }.to_not raise_error }
256
270
  expect { store.delete_store }.to_not raise_error
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: perobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 4.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Schlaeger
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-02 00:00:00.000000000 Z
11
+ date: 2021-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -147,7 +147,7 @@ homepage: https://github.com/scrapper/perobs
147
147
  licenses:
148
148
  - MIT
149
149
  metadata: {}
150
- post_install_message:
150
+ post_install_message:
151
151
  rdoc_options: []
152
152
  require_paths:
153
153
  - lib
@@ -162,9 +162,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
162
  - !ruby/object:Gem::Version
163
163
  version: '0'
164
164
  requirements: []
165
- rubyforge_project:
166
- rubygems_version: 2.7.6.2
167
- signing_key:
165
+ rubygems_version: 3.2.3
166
+ signing_key:
168
167
  specification_version: 4
169
168
  summary: Persistent Ruby Object Store
170
169
  test_files: