perobs 4.2.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/perobs/Store.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Store.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016, 2017, 2018, 2019
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022
6
6
  # by Chris Schlaeger <chris@taskjuggler.org>
7
7
  #
8
8
  # MIT License
@@ -27,6 +27,7 @@
27
27
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
28
 
29
29
  require 'set'
30
+ require 'monitor'
30
31
 
31
32
  require 'perobs/Log'
32
33
  require 'perobs/Handle'
@@ -46,7 +47,7 @@ require 'perobs/ConsoleProgressMeter'
46
47
  # PErsistent Ruby OBject Store
47
48
  module PEROBS
48
49
 
49
- Statistics = Struct.new(:in_memory_objects, :root_objects, :zombie_objects,
50
+ Statistics = Struct.new(:in_memory_objects, :root_objects,
50
51
  :marked_objects, :swept_objects,
51
52
  :created_objects, :collected_objects)
52
53
 
@@ -160,9 +161,6 @@ module PEROBS
160
161
  # List of PEROBS objects that are currently available as Ruby objects
161
162
  # hashed by their ID.
162
163
  @in_memory_objects = {}
163
- # List of objects that were destroyed already but were still found in
164
- # the in_memory_objects list. _collect has not yet been called for them.
165
- @zombie_objects = {}
166
164
 
167
165
  # This objects keeps some counters of interest.
168
166
  @stats = Statistics.new
@@ -173,6 +171,9 @@ module PEROBS
173
171
  # objects in memory.
174
172
  @cache = Cache.new(options[:cache_bits] || 16)
175
173
 
174
+ # Lock to serialize access to the Store and all stored data.
175
+ @lock = Monitor.new
176
+
176
177
  # The named (global) objects IDs hashed by their name
177
178
  unless options[:no_root_objects]
178
179
  unless (@root_objects = object_by_id(0))
@@ -243,8 +244,8 @@ module PEROBS
243
244
  end
244
245
  end
245
246
 
246
- @db = @class_map = @in_memory_objects = @zombie_objects =
247
- @stats = @cache = @root_objects = nil
247
+ @db = @class_map = @in_memory_objects = @stats = @cache =
248
+ @root_objects = nil
248
249
  end
249
250
 
250
251
  # You need to call this method to create new PEROBS objects that belong to
@@ -259,11 +260,13 @@ module PEROBS
259
260
  PEROBS.log.fatal "#{klass} is not a BasicObject derivative"
260
261
  end
261
262
 
262
- obj = _construct_po(klass, _new_id, *args)
263
- # Mark the new object as modified so it gets pushed into the database.
264
- @cache.cache_write(obj)
265
- # Return a POXReference proxy for the newly created object.
266
- obj.myself
263
+ @lock.synchronize do
264
+ obj = _construct_po(klass, _new_id, *args)
265
+ # Mark the new object as modified so it gets pushed into the database.
266
+ @cache.cache_write(obj)
267
+ # Return a POXReference proxy for the newly created object.
268
+ obj.myself
269
+ end
267
270
  end
268
271
 
269
272
  # For library internal use only!
@@ -280,9 +283,11 @@ module PEROBS
280
283
  # method was called. This is an alternative to exit() that additionaly
281
284
  # deletes the entire database.
282
285
  def delete_store
283
- @db.delete_database
284
- @db = @class_map = @in_memory_objects = @zombie_objects =
285
- @stats = @cache = @root_objects = nil
286
+ @lock.synchronize do
287
+ @db.delete_database
288
+ @db = @class_map = @in_memory_objects = @stats = @cache =
289
+ @root_objects = nil
290
+ end
286
291
  end
287
292
 
288
293
  # Store the provided object under the given name. Use this to make the
@@ -294,25 +299,27 @@ module PEROBS
294
299
  # @param obj [PEROBS::Object] The object to store
295
300
  # @return [PEROBS::Object] The stored object.
296
301
  def []=(name, obj)
297
- # If the passed object is nil, we delete the entry if it exists.
298
- if obj.nil?
299
- @root_objects.delete(name)
300
- return nil
301
- end
302
+ @lock.synchronize do
303
+ # If the passed object is nil, we delete the entry if it exists.
304
+ if obj.nil?
305
+ @root_objects.delete(name)
306
+ return nil
307
+ end
302
308
 
303
- # We only allow derivatives of PEROBS::Object to be stored in the
304
- # store.
305
- unless obj.is_a?(ObjectBase)
306
- PEROBS.log.fatal 'Object must be of class PEROBS::Object but ' +
307
- "is of class #{obj.class}"
308
- end
309
+ # We only allow derivatives of PEROBS::Object to be stored in the
310
+ # store.
311
+ unless obj.is_a?(ObjectBase)
312
+ PEROBS.log.fatal 'Object must be of class PEROBS::Object but ' +
313
+ "is of class #{obj.class}"
314
+ end
309
315
 
310
- unless obj.store == self
311
- PEROBS.log.fatal 'The object does not belong to this store.'
312
- end
316
+ unless obj.store == self
317
+ PEROBS.log.fatal 'The object does not belong to this store.'
318
+ end
313
319
 
314
- # Store the name and mark the name list as modified.
315
- @root_objects[name] = obj._id
320
+ # Store the name and mark the name list as modified.
321
+ @root_objects[name] = obj._id
322
+ end
316
323
 
317
324
  obj
318
325
  end
@@ -322,28 +329,34 @@ module PEROBS
322
329
  # returned.
323
330
  # @return The requested object or nil if it doesn't exist.
324
331
  def [](name)
325
- # Return nil if there is no object with that name.
326
- return nil unless (id = @root_objects[name])
332
+ @lock.synchronize do
333
+ # Return nil if there is no object with that name.
334
+ return nil unless (id = @root_objects[name])
327
335
 
328
- POXReference.new(self, id)
336
+ POXReference.new(self, id)
337
+ end
329
338
  end
330
339
 
331
340
  # Return a list with all the names of the root objects.
332
341
  # @return [Array of Symbols]
333
342
  def names
334
- @root_objects.keys
343
+ @lock.synchronize do
344
+ @root_objects.keys
345
+ end
335
346
  end
336
347
 
337
348
  # Flush out all modified objects to disk and shrink the in-memory list if
338
349
  # needed.
339
350
  def sync
340
- if @cache.in_transaction?
341
- @cache.abort_transaction
351
+ @lock.synchronize do
352
+ if @cache.in_transaction?
353
+ @cache.abort_transaction
354
+ @cache.flush
355
+ PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
356
+ Kernel.caller.join("\n")
357
+ end
342
358
  @cache.flush
343
- PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
344
- Kernel.caller.join("\n")
345
359
  end
346
- @cache.flush
347
360
  end
348
361
 
349
362
  # Return the number of object stored in the store. CAVEAT: This method
@@ -353,7 +366,9 @@ module PEROBS
353
366
  def size
354
367
  # We don't include the Hash that stores the root objects into the object
355
368
  # count.
356
- @db.item_counter - 1
369
+ @lock.synchronize do
370
+ @db.item_counter - 1
371
+ end
357
372
  end
358
373
 
359
374
  # Discard all objects that are not somehow connected to the root objects
@@ -362,51 +377,20 @@ module PEROBS
362
377
  # method periodically.
363
378
  # @return [Integer] The number of collected objects
364
379
  def gc
365
- sync
366
- mark
367
- sweep
380
+ @lock.synchronize do
381
+ sync
382
+ mark
383
+ sweep
384
+ end
368
385
  end
369
386
 
370
387
  # Return the object with the provided ID. This method is not part of the
371
388
  # public API and should never be called by outside users. It's purely
372
389
  # intended for internal use.
373
390
  def object_by_id(id)
374
- if (ruby_object_id = @in_memory_objects[id])
375
- # We have the object in memory so we can just return it.
376
- begin
377
- object = ObjectSpace._id2ref(ruby_object_id)
378
- # Let's make sure the object is really the object we are looking
379
- # for. The GC might have recycled it already and the Ruby object ID
380
- # could now be used for another object.
381
- if object.is_a?(ObjectBase) && object._id == id
382
- return object
383
- end
384
- rescue RangeError => e
385
- # Due to a race condition the object can still be in the
386
- # @in_memory_objects list but has been collected already by the Ruby
387
- # GC. In that case we need to load it again. The _collect() call
388
- # will happen much later, potentially after we have registered a new
389
- # object with the same ID.
390
- @zombie_objects[id] = @in_memory_objects.delete(id)
391
- end
391
+ @lock.synchronize do
392
+ object_by_id_internal(id)
392
393
  end
393
-
394
- if (obj = @cache.object_by_id(id))
395
- PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in cache but not in_memory"
396
- end
397
-
398
- # We don't have the object in memory. Let's find it in the storage.
399
- if @db.include?(id)
400
- # Great, object found. Read it into memory and return it.
401
- obj = ObjectBase::read(self, id)
402
- # Add the object to the in-memory storage list.
403
- @cache.cache_read(obj)
404
-
405
- return obj
406
- end
407
-
408
- # The requested object does not exist. Return nil.
409
- nil
410
394
  end
411
395
 
412
396
  # This method can be used to check the database and optionally repair it.
@@ -471,38 +455,54 @@ module PEROBS
471
455
  # beginning of the transaction. The exception is passed on to the
472
456
  # enclosing scope, so you probably want to handle it accordingly.
473
457
  def transaction
474
- @cache.begin_transaction
458
+ transaction_not_started = true
459
+ while transaction_not_started do
460
+ begin
461
+ @lock.synchronize do
462
+ @cache.begin_transaction
463
+ # If we get to this point, the transaction was successfully
464
+ # started. We can exit the loop.
465
+ transaction_not_started = false
466
+ end
467
+ rescue TransactionInOtherThread
468
+ # sleep up to 50ms
469
+ sleep(rand(50) / 1000.0)
470
+ end
471
+ end
472
+
475
473
  begin
476
474
  yield if block_given?
477
475
  rescue => e
478
- @cache.abort_transaction
476
+ @lock.synchronize { @cache.abort_transaction }
479
477
  raise e
480
478
  end
481
- @cache.end_transaction
479
+ @lock.synchronize { @cache.end_transaction }
482
480
  end
483
481
 
484
482
  # Calls the given block once for each object, passing that object as a
485
483
  # parameter.
486
484
  def each
487
- @db.clear_marks
488
- # Start with the object 0 and the indexes of the root objects. Push them
489
- # onto the work stack.
490
- stack = [ 0 ] + @root_objects.values
491
- while !stack.empty?
492
- # Get an object index from the stack.
493
- id = stack.pop
494
- next if @db.is_marked?(id)
495
-
496
- unless (obj = object_by_id(id))
497
- PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
498
- "not found."
499
- end
500
- # Mark the object so it will never be pushed to the stack again.
501
- @db.mark(id)
502
- yield(obj.myself) if block_given?
503
- # Push the IDs of all unmarked referenced objects onto the stack
504
- obj._referenced_object_ids.each do |r_id|
505
- stack << r_id unless @db.is_marked?(r_id)
485
+ @lock.synchronize do
486
+ @db.clear_marks
487
+ # Start with the object 0 and the indexes of the root objects. Push them
488
+ # onto the work stack.
489
+ stack = [ 0 ] + @root_objects.values
490
+ while !stack.empty?
491
+ # Get an object index from the stack.
492
+ id = stack.pop
493
+ next if @db.is_marked?(id)
494
+
495
+ unless (obj = object_by_id_internal(id))
496
+ PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
497
+ "not found."
498
+ end
499
+ # Mark the object so it will never be pushed to the stack again.
500
+ @db.mark(id)
501
+ yield(obj.myself) if block_given?
502
+ # Push the IDs of all unmarked referenced objects onto the stack
503
+ obj._referenced_object_ids.each do |r_id|
504
+ stack << r_id unless @db.is_marked?(r_id)
505
+ end
506
506
  end
507
507
  end
508
508
  end
@@ -510,7 +510,7 @@ module PEROBS
510
510
  # Rename classes of objects stored in the data base.
511
511
  # @param rename_map [Hash] Hash that maps the old name to the new name
512
512
  def rename_classes(rename_map)
513
- @class_map.rename(rename_map)
513
+ @lock.synchronize { @class_map.rename(rename_map) }
514
514
  end
515
515
 
516
516
  # Internal method. Don't use this outside of this library!
@@ -518,14 +518,16 @@ module PEROBS
518
518
  # random numbers between 0 and 2**64 - 1.
519
519
  # @return [Integer]
520
520
  def _new_id
521
- begin
522
- # Generate a random number. It's recommended to not store more than
523
- # 2**62 objects in the same store.
524
- id = rand(2**64)
525
- # Ensure that we don't have already another object with this ID.
526
- end while @in_memory_objects.include?(id) || @db.include?(id)
521
+ @lock.synchronize do
522
+ begin
523
+ # Generate a random number. It's recommended to not store more than
524
+ # 2**62 objects in the same store.
525
+ id = rand(2**64)
526
+ # Ensure that we don't have already another object with this ID.
527
+ end while @in_memory_objects.include?(id) || @db.include?(id)
527
528
 
528
- id
529
+ id
530
+ end
529
531
  end
530
532
 
531
533
  # Internal method. Don't use this outside of this library!
@@ -536,16 +538,18 @@ module PEROBS
536
538
  # @param obj [BasicObject] Object to register
537
539
  # @param id [Integer] object ID
538
540
  def _register_in_memory(obj, id)
539
- unless obj.is_a?(ObjectBase)
540
- PEROBS.log.fatal "You can only register ObjectBase objects"
541
- end
542
- if @in_memory_objects.include?(id)
543
- PEROBS.log.fatal "The Store::_in_memory_objects list already " +
544
- "contains an object for ID #{id}"
545
- end
541
+ @lock.synchronize do
542
+ unless obj.is_a?(ObjectBase)
543
+ PEROBS.log.fatal "You can only register ObjectBase objects"
544
+ end
545
+ if @in_memory_objects.include?(id)
546
+ PEROBS.log.fatal "The Store::_in_memory_objects list already " +
547
+ "contains an object for ID #{id}"
548
+ end
546
549
 
547
- @in_memory_objects[id] = obj.object_id
548
- @stats[:created_objects] += 1
550
+ @in_memory_objects[id] = obj.object_id
551
+ @stats[:created_objects] += 1
552
+ end
549
553
  end
550
554
 
551
555
  # Remove the object from the in-memory list. This is an internal method
@@ -553,26 +557,73 @@ module PEROBS
553
557
  # finalizer, so many restrictions apply!
554
558
  # @param id [Integer] Object ID of object to remove from the list
555
559
  def _collect(id, ruby_object_id)
556
- if @in_memory_objects[id] == ruby_object_id
560
+ # This method should only be called from the Ruby garbage collector.
561
+ # Therefor no locking is needed or even possible. The GC can kick in at
562
+ # any time and we could be anywhere in the code. So there is a small
563
+ # risk for a race here, but it should not have any serious consequences.
564
+ if @in_memory_objects && @in_memory_objects[id] == ruby_object_id
557
565
  @in_memory_objects.delete(id)
558
566
  @stats[:collected_objects] += 1
559
- elsif @zombie_objects[id] == ruby_object_id
560
- @zombie_objects.delete(id)
561
- @stats[:collected_objects] += 1
562
567
  end
563
568
  end
564
569
 
565
570
  # This method returns a Hash with some statistics about this store.
566
571
  def statistics
567
- @stats.in_memory_objects = @in_memory_objects.length
568
- @stats.root_objects = @root_objects.length
569
- @stats.zombie_objects = @zombie_objects.length
572
+ @lock.synchronize do
573
+ @stats.in_memory_objects = @in_memory_objects.length
574
+ @stats.root_objects = @root_objects.length
575
+ end
570
576
 
571
577
  @stats
572
578
  end
573
579
 
574
580
  private
575
581
 
582
+ def object_by_id_internal(id)
583
+ if (ruby_object_id = @in_memory_objects[id])
584
+ # We have the object in memory so we can just return it.
585
+ begin
586
+ object = ObjectSpace._id2ref(ruby_object_id)
587
+ # Let's make sure the object is really the object we are looking
588
+ # for. The GC might have recycled it already and the Ruby object ID
589
+ # could now be used for another object.
590
+ if object.is_a?(ObjectBase) && object._id == id
591
+ return object
592
+ end
593
+ rescue RangeError => e
594
+ # Due to a race condition the object can still be in the
595
+ # @in_memory_objects list but has been collected already by the Ruby
596
+ # GC. The _collect() call has not been completed yet. We now have to
597
+ # wait until this has been done. I think the GC lock will prevent a
598
+ # race on @in_memory_objects.
599
+ GC.start
600
+ while @in_memory_objects.include?(id)
601
+ sleep 0.01
602
+ end
603
+ end
604
+ end
605
+
606
+ # This is just a safety check. It has never triggered, so we can disable
607
+ # it for now.
608
+ #if (obj = @cache.object_by_id(id))
609
+ # PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in " +
610
+ # "cache but not in_memory"
611
+ #end
612
+
613
+ # We don't have the object in memory. Let's find it in the storage.
614
+ if @db.include?(id)
615
+ # Great, object found. Read it into memory and return it.
616
+ obj = ObjectBase::read(self, id)
617
+ # Add the object to the in-memory storage list.
618
+ @cache.cache_read(obj)
619
+
620
+ return obj
621
+ end
622
+
623
+ # The requested object does not exist. Return nil.
624
+ nil
625
+ end
626
+
576
627
  # Mark phase of a mark-and-sweep garbage collector. It will mark all
577
628
  # objects that are reachable from the root objects.
578
629
  def mark
@@ -599,6 +650,7 @@ module PEROBS
599
650
  @stats.swept_objects = @db.delete_unmarked_objects do |id|
600
651
  @cache.evict(id)
601
652
  end
653
+ @db.clear_marks
602
654
  GC.start
603
655
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
604
656
  @stats.swept_objects
@@ -1,4 +1,4 @@
1
1
  module PEROBS
2
2
  # The version number
3
- VERSION = "4.2.0"
3
+ VERSION = "4.5.0"
4
4
  end
data/lib/perobs.rb CHANGED
@@ -28,3 +28,4 @@
28
28
  require "perobs/version"
29
29
  require 'perobs/Store'
30
30
  require 'perobs/ConsoleProgressMeter'
31
+ require 'perobs/FuzzyStringMatcher'
data/perobs.gemspec CHANGED
@@ -20,5 +20,5 @@ GEM_SPEC = Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency 'bundler', '~> 2.3'
22
22
  spec.add_development_dependency 'yard', '~>0.9.12'
23
- spec.add_development_dependency 'rake', '~> 12.3.3'
23
+ spec.add_development_dependency 'rake', '~> 13.0.3'
24
24
  end
@@ -265,5 +265,35 @@ describe PEROBS::FlatFileDB do
265
265
  db.close
266
266
  end
267
267
 
268
+ it 'should handle duplicate entries for the same ID in database.blobs file' do
269
+ @store.exit
270
+
271
+ db = PEROBS::FlatFileDB.new(@db_dir)
272
+ db_file = File.join(@db_dir, 'database.blobs')
273
+ db.open
274
+ 0.upto(5) do |i|
275
+ db.put_object("#{i + 1}:#{'X' * (i + 1) * 30}$", i + 1)
276
+ end
277
+ db.close
278
+
279
+ # This appends the entry 2 again
280
+ blob2 = File.read(db_file, 319 - 199, 199)
281
+ File.write(db_file, blob2, File.size(db_file))
282
+
283
+ db.open
284
+ expect(db.check_db).to eql(2)
285
+ expect(db.check_db(true)).to eql(1)
286
+ db.close
287
+ db = PEROBS::FlatFileDB.new(@db_dir, { :log => $stderr,
288
+ :log_level => Logger::WARN })
289
+ db.open
290
+ expect(db.check_db).to eql(0)
291
+
292
+ 0.upto(5) do |i|
293
+ expect(db.get_object(i + 1)).to eql("#{i + 1}:#{'X' * (i + 1) * 30}$")
294
+ end
295
+ db.close
296
+ end
297
+
268
298
  end
269
299
 
@@ -29,13 +29,25 @@ require 'perobs/FuzzyStringMatcher'
29
29
 
30
30
  module PEROBS
31
31
 
32
+ class WordRef < PEROBS::Object
33
+
34
+ attr_persist :word, :line
35
+
36
+ def initialize(store, word, line)
37
+ super(store)
38
+ self.word = word
39
+ self.line = line
40
+ end
41
+
42
+ end
43
+
32
44
  describe FuzzyStringMatcher do
33
45
 
34
46
  before(:all) do
35
47
  @db_name = generate_db_name(__FILE__)
36
48
  @store = PEROBS::Store.new(@db_name)
37
- @fsm = FuzzyStringMatcher.new(@store, 'test')
38
- @fsm2 = FuzzyStringMatcher.new(@store, 'test', true, 2)
49
+ @store['fsm'] = @fsm = @store.new(FuzzyStringMatcher)
50
+ @store['fsm2'] = @fsm2 = @store.new(FuzzyStringMatcher, true, 2)
39
51
  end
40
52
 
41
53
  after(:all) do
@@ -103,6 +115,44 @@ module PEROBS
103
115
  expect(@fsm.best_matches('foobar')).to eql([])
104
116
  end
105
117
 
118
+ it 'should find a match' do
119
+ dut = {
120
+ [ 'one' ] => [ [ 'one', 1.0 ] ],
121
+ [ 'three' ] => [ [ 'three', 1.0 ] ],
122
+ [ 'four' ]=> [ [ 'four', 1.0 ], [ 'fourteen', 0.666 ] ],
123
+ [ 'four', 1.0 ]=> [ [ 'four', 1.0 ] ],
124
+ [ 'even' ] => [ [ 'seven', 0.666 ], [ 'eleven', 0.666 ] ],
125
+ [ 'teen' ] => [ ['thirteen', 0.6666666666666666],
126
+ ['fourteen', 0.6666666666666666],
127
+ ['fifteen', 0.6666666666666666],
128
+ ['sixteen', 0.6666666666666666],
129
+ ['seventeen', 0.6666666666666666],
130
+ ['eighteen', 0.6666666666666666],
131
+ ['nineteen', 0.6666666666666666] ],
132
+ [ 'aight' ] => [ [ 'eight', 0.5 ] ],
133
+ [ 'thirdteen' ] => [ [ 'thirteen', 0.5 ] ],
134
+ [ 'shirt teen', 0.3 ] => [ [ 'thirteen', 0.333 ] ]
135
+ }
136
+ check_data_under_test(@fsm, dut)
137
+ end
138
+
139
+ it 'should sort best to worst matches' do
140
+ @fsm.clear
141
+ %w( xbar xfoox foor bar foobar barfoo foo rab baar fool xbarx
142
+ foobarx xfoobarx foo_bar ).each do |w|
143
+ @fsm.learn(w, w)
144
+ end
145
+ dut = {
146
+ [ 'foo' ] => [["foo", 1.0], ["foor", 0.5], ["foobar", 0.5],
147
+ ["fool", 0.5], ["foobarx", 0.5], ["foo_bar", 0.5],
148
+ ["barfoo", 0.5]],
149
+ [ 'bar' ] => [["bar", 1.0], ["barfoo", 0.5], ["xbar", 0.5],
150
+ ["foobar", 0.5], ["foo_bar", 0.5]],
151
+ [ 'foobar' ] => [["foobar", 1.0], ["foobarx", 0.8], ["xfoobarx", 0.6]]
152
+ }
153
+ check_data_under_test(@fsm, dut)
154
+ end
155
+
106
156
  it 'should handle a larger text' do
107
157
  text =<<-EOT
108
158
  MIT License
@@ -131,9 +181,9 @@ EOT
131
181
  @fsm2.learn(word, word)
132
182
  end
133
183
  stats = @fsm2.stats
134
- expect(stats['dictionary_size']).to eql(363)
184
+ expect(stats['dictionary_size']).to eql(352)
135
185
  expect(stats['max_list_size']).to eql(22)
136
- expect(stats['avg_list_size']).to be_within(0.001).of(2.366)
186
+ expect(stats['avg_list_size']).to be_within(0.001).of(2.409)
137
187
  end
138
188
 
139
189
  it 'should find case sensitive matches' do
@@ -145,6 +195,46 @@ EOT
145
195
  check_data_under_test(@fsm2, dut)
146
196
  end
147
197
 
198
+ it 'should support references to PEROBS objects' do
199
+ text =<<-EOT
200
+ MIT License
201
+
202
+ Permission is hereby granted, free of charge, to any person obtaining
203
+ a copy of this software and associated documentation files (the
204
+ "Software"), to deal in the Software without restriction, including
205
+ without limitation the rights to use, copy, modify, merge, publish,
206
+ distribute, sublicense, and/or sell copies of the Software, and to
207
+ permit persons to whom the Software is furnished to do so, subject to
208
+ the following conditions:
209
+ EOT
210
+
211
+ line_no = 1
212
+ @store['fsm'] = fsm = @store.new(FuzzyStringMatcher)
213
+ @store['refs'] = refs = @store.new(Array)
214
+ text.each_line do |line|
215
+ line.split.each do |word|
216
+ ref = @store.new(WordRef, word, line_no)
217
+ refs << ref
218
+ fsm.learn(word, ref)
219
+ end
220
+ line_no += 1
221
+ end
222
+
223
+ found_lines = []
224
+ fsm.best_matches('SOFTWARE').each do |match|
225
+ found_lines << match[0].line
226
+ end
227
+ expect(found_lines.sort).to eql([ 4, 5, 5, 7, 8 ])
228
+ end
229
+
230
+ it 'should with small search words' do
231
+ @fsm.clear
232
+ mats = 'Yukihiro Matsumoto'
233
+ @fsm.learn(mats)
234
+ expect(@fsm.best_matches('Yukihiro').first.first).to eql(mats)
235
+ expect(@fsm.best_matches('Mats', 0.3).first.first).to eql(mats)
236
+ end
237
+
148
238
  def check_data_under_test(fsm, dut)
149
239
  dut.each do |inputs, reference|
150
240
  key = inputs[0]