perobs 4.2.0 → 4.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/perobs/Store.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Store.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016, 2017, 2018, 2019
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022
6
6
  # by Chris Schlaeger <chris@taskjuggler.org>
7
7
  #
8
8
  # MIT License
@@ -27,6 +27,7 @@
27
27
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
28
 
29
29
  require 'set'
30
+ require 'monitor'
30
31
 
31
32
  require 'perobs/Log'
32
33
  require 'perobs/Handle'
@@ -46,7 +47,7 @@ require 'perobs/ConsoleProgressMeter'
46
47
  # PErsistent Ruby OBject Store
47
48
  module PEROBS
48
49
 
49
- Statistics = Struct.new(:in_memory_objects, :root_objects, :zombie_objects,
50
+ Statistics = Struct.new(:in_memory_objects, :root_objects,
50
51
  :marked_objects, :swept_objects,
51
52
  :created_objects, :collected_objects)
52
53
 
@@ -160,9 +161,6 @@ module PEROBS
160
161
  # List of PEROBS objects that are currently available as Ruby objects
161
162
  # hashed by their ID.
162
163
  @in_memory_objects = {}
163
- # List of objects that were destroyed already but were still found in
164
- # the in_memory_objects list. _collect has not yet been called for them.
165
- @zombie_objects = {}
166
164
 
167
165
  # This objects keeps some counters of interest.
168
166
  @stats = Statistics.new
@@ -173,6 +171,9 @@ module PEROBS
173
171
  # objects in memory.
174
172
  @cache = Cache.new(options[:cache_bits] || 16)
175
173
 
174
+ # Lock to serialize access to the Store and all stored data.
175
+ @lock = Monitor.new
176
+
176
177
  # The named (global) objects IDs hashed by their name
177
178
  unless options[:no_root_objects]
178
179
  unless (@root_objects = object_by_id(0))
@@ -243,8 +244,8 @@ module PEROBS
243
244
  end
244
245
  end
245
246
 
246
- @db = @class_map = @in_memory_objects = @zombie_objects =
247
- @stats = @cache = @root_objects = nil
247
+ @db = @class_map = @in_memory_objects = @stats = @cache =
248
+ @root_objects = nil
248
249
  end
249
250
 
250
251
  # You need to call this method to create new PEROBS objects that belong to
@@ -259,11 +260,13 @@ module PEROBS
259
260
  PEROBS.log.fatal "#{klass} is not a BasicObject derivative"
260
261
  end
261
262
 
262
- obj = _construct_po(klass, _new_id, *args)
263
- # Mark the new object as modified so it gets pushed into the database.
264
- @cache.cache_write(obj)
265
- # Return a POXReference proxy for the newly created object.
266
- obj.myself
263
+ @lock.synchronize do
264
+ obj = _construct_po(klass, _new_id, *args)
265
+ # Mark the new object as modified so it gets pushed into the database.
266
+ @cache.cache_write(obj)
267
+ # Return a POXReference proxy for the newly created object.
268
+ obj.myself
269
+ end
267
270
  end
268
271
 
269
272
  # For library internal use only!
@@ -280,9 +283,11 @@ module PEROBS
280
283
  # method was called. This is an alternative to exit() that additionaly
281
284
  # deletes the entire database.
282
285
  def delete_store
283
- @db.delete_database
284
- @db = @class_map = @in_memory_objects = @zombie_objects =
285
- @stats = @cache = @root_objects = nil
286
+ @lock.synchronize do
287
+ @db.delete_database
288
+ @db = @class_map = @in_memory_objects = @stats = @cache =
289
+ @root_objects = nil
290
+ end
286
291
  end
287
292
 
288
293
  # Store the provided object under the given name. Use this to make the
@@ -294,25 +299,27 @@ module PEROBS
294
299
  # @param obj [PEROBS::Object] The object to store
295
300
  # @return [PEROBS::Object] The stored object.
296
301
  def []=(name, obj)
297
- # If the passed object is nil, we delete the entry if it exists.
298
- if obj.nil?
299
- @root_objects.delete(name)
300
- return nil
301
- end
302
+ @lock.synchronize do
303
+ # If the passed object is nil, we delete the entry if it exists.
304
+ if obj.nil?
305
+ @root_objects.delete(name)
306
+ return nil
307
+ end
302
308
 
303
- # We only allow derivatives of PEROBS::Object to be stored in the
304
- # store.
305
- unless obj.is_a?(ObjectBase)
306
- PEROBS.log.fatal 'Object must be of class PEROBS::Object but ' +
307
- "is of class #{obj.class}"
308
- end
309
+ # We only allow derivatives of PEROBS::Object to be stored in the
310
+ # store.
311
+ unless obj.is_a?(ObjectBase)
312
+ PEROBS.log.fatal 'Object must be of class PEROBS::Object but ' +
313
+ "is of class #{obj.class}"
314
+ end
309
315
 
310
- unless obj.store == self
311
- PEROBS.log.fatal 'The object does not belong to this store.'
312
- end
316
+ unless obj.store == self
317
+ PEROBS.log.fatal 'The object does not belong to this store.'
318
+ end
313
319
 
314
- # Store the name and mark the name list as modified.
315
- @root_objects[name] = obj._id
320
+ # Store the name and mark the name list as modified.
321
+ @root_objects[name] = obj._id
322
+ end
316
323
 
317
324
  obj
318
325
  end
@@ -322,28 +329,34 @@ module PEROBS
322
329
  # returned.
323
330
  # @return The requested object or nil if it doesn't exist.
324
331
  def [](name)
325
- # Return nil if there is no object with that name.
326
- return nil unless (id = @root_objects[name])
332
+ @lock.synchronize do
333
+ # Return nil if there is no object with that name.
334
+ return nil unless (id = @root_objects[name])
327
335
 
328
- POXReference.new(self, id)
336
+ POXReference.new(self, id)
337
+ end
329
338
  end
330
339
 
331
340
  # Return a list with all the names of the root objects.
332
341
  # @return [Array of Symbols]
333
342
  def names
334
- @root_objects.keys
343
+ @lock.synchronize do
344
+ @root_objects.keys
345
+ end
335
346
  end
336
347
 
337
348
  # Flush out all modified objects to disk and shrink the in-memory list if
338
349
  # needed.
339
350
  def sync
340
- if @cache.in_transaction?
341
- @cache.abort_transaction
351
+ @lock.synchronize do
352
+ if @cache.in_transaction?
353
+ @cache.abort_transaction
354
+ @cache.flush
355
+ PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
356
+ Kernel.caller.join("\n")
357
+ end
342
358
  @cache.flush
343
- PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
344
- Kernel.caller.join("\n")
345
359
  end
346
- @cache.flush
347
360
  end
348
361
 
349
362
  # Return the number of object stored in the store. CAVEAT: This method
@@ -353,7 +366,9 @@ module PEROBS
353
366
  def size
354
367
  # We don't include the Hash that stores the root objects into the object
355
368
  # count.
356
- @db.item_counter - 1
369
+ @lock.synchronize do
370
+ @db.item_counter - 1
371
+ end
357
372
  end
358
373
 
359
374
  # Discard all objects that are not somehow connected to the root objects
@@ -362,51 +377,20 @@ module PEROBS
362
377
  # method periodically.
363
378
  # @return [Integer] The number of collected objects
364
379
  def gc
365
- sync
366
- mark
367
- sweep
380
+ @lock.synchronize do
381
+ sync
382
+ mark
383
+ sweep
384
+ end
368
385
  end
369
386
 
370
387
  # Return the object with the provided ID. This method is not part of the
371
388
  # public API and should never be called by outside users. It's purely
372
389
  # intended for internal use.
373
390
  def object_by_id(id)
374
- if (ruby_object_id = @in_memory_objects[id])
375
- # We have the object in memory so we can just return it.
376
- begin
377
- object = ObjectSpace._id2ref(ruby_object_id)
378
- # Let's make sure the object is really the object we are looking
379
- # for. The GC might have recycled it already and the Ruby object ID
380
- # could now be used for another object.
381
- if object.is_a?(ObjectBase) && object._id == id
382
- return object
383
- end
384
- rescue RangeError => e
385
- # Due to a race condition the object can still be in the
386
- # @in_memory_objects list but has been collected already by the Ruby
387
- # GC. In that case we need to load it again. The _collect() call
388
- # will happen much later, potentially after we have registered a new
389
- # object with the same ID.
390
- @zombie_objects[id] = @in_memory_objects.delete(id)
391
- end
391
+ @lock.synchronize do
392
+ object_by_id_internal(id)
392
393
  end
393
-
394
- if (obj = @cache.object_by_id(id))
395
- PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in cache but not in_memory"
396
- end
397
-
398
- # We don't have the object in memory. Let's find it in the storage.
399
- if @db.include?(id)
400
- # Great, object found. Read it into memory and return it.
401
- obj = ObjectBase::read(self, id)
402
- # Add the object to the in-memory storage list.
403
- @cache.cache_read(obj)
404
-
405
- return obj
406
- end
407
-
408
- # The requested object does not exist. Return nil.
409
- nil
410
394
  end
411
395
 
412
396
  # This method can be used to check the database and optionally repair it.
@@ -471,38 +455,54 @@ module PEROBS
471
455
  # beginning of the transaction. The exception is passed on to the
472
456
  # enclosing scope, so you probably want to handle it accordingly.
473
457
  def transaction
474
- @cache.begin_transaction
458
+ transaction_not_started = true
459
+ while transaction_not_started do
460
+ begin
461
+ @lock.synchronize do
462
+ @cache.begin_transaction
463
+ # If we get to this point, the transaction was successfully
464
+ # started. We can exit the loop.
465
+ transaction_not_started = false
466
+ end
467
+ rescue TransactionInOtherThread
468
+ # sleep up to 50ms
469
+ sleep(rand(50) / 1000.0)
470
+ end
471
+ end
472
+
475
473
  begin
476
474
  yield if block_given?
477
475
  rescue => e
478
- @cache.abort_transaction
476
+ @lock.synchronize { @cache.abort_transaction }
479
477
  raise e
480
478
  end
481
- @cache.end_transaction
479
+ @lock.synchronize { @cache.end_transaction }
482
480
  end
483
481
 
484
482
  # Calls the given block once for each object, passing that object as a
485
483
  # parameter.
486
484
  def each
487
- @db.clear_marks
488
- # Start with the object 0 and the indexes of the root objects. Push them
489
- # onto the work stack.
490
- stack = [ 0 ] + @root_objects.values
491
- while !stack.empty?
492
- # Get an object index from the stack.
493
- id = stack.pop
494
- next if @db.is_marked?(id)
495
-
496
- unless (obj = object_by_id(id))
497
- PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
498
- "not found."
499
- end
500
- # Mark the object so it will never be pushed to the stack again.
501
- @db.mark(id)
502
- yield(obj.myself) if block_given?
503
- # Push the IDs of all unmarked referenced objects onto the stack
504
- obj._referenced_object_ids.each do |r_id|
505
- stack << r_id unless @db.is_marked?(r_id)
485
+ @lock.synchronize do
486
+ @db.clear_marks
487
+ # Start with the object 0 and the indexes of the root objects. Push them
488
+ # onto the work stack.
489
+ stack = [ 0 ] + @root_objects.values
490
+ while !stack.empty?
491
+ # Get an object index from the stack.
492
+ id = stack.pop
493
+ next if @db.is_marked?(id)
494
+
495
+ unless (obj = object_by_id_internal(id))
496
+ PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
497
+ "not found."
498
+ end
499
+ # Mark the object so it will never be pushed to the stack again.
500
+ @db.mark(id)
501
+ yield(obj.myself) if block_given?
502
+ # Push the IDs of all unmarked referenced objects onto the stack
503
+ obj._referenced_object_ids.each do |r_id|
504
+ stack << r_id unless @db.is_marked?(r_id)
505
+ end
506
506
  end
507
507
  end
508
508
  end
@@ -510,7 +510,7 @@ module PEROBS
510
510
  # Rename classes of objects stored in the data base.
511
511
  # @param rename_map [Hash] Hash that maps the old name to the new name
512
512
  def rename_classes(rename_map)
513
- @class_map.rename(rename_map)
513
+ @lock.synchronize { @class_map.rename(rename_map) }
514
514
  end
515
515
 
516
516
  # Internal method. Don't use this outside of this library!
@@ -518,14 +518,16 @@ module PEROBS
518
518
  # random numbers between 0 and 2**64 - 1.
519
519
  # @return [Integer]
520
520
  def _new_id
521
- begin
522
- # Generate a random number. It's recommended to not store more than
523
- # 2**62 objects in the same store.
524
- id = rand(2**64)
525
- # Ensure that we don't have already another object with this ID.
526
- end while @in_memory_objects.include?(id) || @db.include?(id)
521
+ @lock.synchronize do
522
+ begin
523
+ # Generate a random number. It's recommended to not store more than
524
+ # 2**62 objects in the same store.
525
+ id = rand(2**64)
526
+ # Ensure that we don't have already another object with this ID.
527
+ end while @in_memory_objects.include?(id) || @db.include?(id)
527
528
 
528
- id
529
+ id
530
+ end
529
531
  end
530
532
 
531
533
  # Internal method. Don't use this outside of this library!
@@ -536,16 +538,18 @@ module PEROBS
536
538
  # @param obj [BasicObject] Object to register
537
539
  # @param id [Integer] object ID
538
540
  def _register_in_memory(obj, id)
539
- unless obj.is_a?(ObjectBase)
540
- PEROBS.log.fatal "You can only register ObjectBase objects"
541
- end
542
- if @in_memory_objects.include?(id)
543
- PEROBS.log.fatal "The Store::_in_memory_objects list already " +
544
- "contains an object for ID #{id}"
545
- end
541
+ @lock.synchronize do
542
+ unless obj.is_a?(ObjectBase)
543
+ PEROBS.log.fatal "You can only register ObjectBase objects"
544
+ end
545
+ if @in_memory_objects.include?(id)
546
+ PEROBS.log.fatal "The Store::_in_memory_objects list already " +
547
+ "contains an object for ID #{id}"
548
+ end
546
549
 
547
- @in_memory_objects[id] = obj.object_id
548
- @stats[:created_objects] += 1
550
+ @in_memory_objects[id] = obj.object_id
551
+ @stats[:created_objects] += 1
552
+ end
549
553
  end
550
554
 
551
555
  # Remove the object from the in-memory list. This is an internal method
@@ -553,26 +557,73 @@ module PEROBS
553
557
  # finalizer, so many restrictions apply!
554
558
  # @param id [Integer] Object ID of object to remove from the list
555
559
  def _collect(id, ruby_object_id)
556
- if @in_memory_objects[id] == ruby_object_id
560
+ # This method should only be called from the Ruby garbage collector.
561
+ # Therefor no locking is needed or even possible. The GC can kick in at
562
+ # any time and we could be anywhere in the code. So there is a small
563
+ # risk for a race here, but it should not have any serious consequences.
564
+ if @in_memory_objects && @in_memory_objects[id] == ruby_object_id
557
565
  @in_memory_objects.delete(id)
558
566
  @stats[:collected_objects] += 1
559
- elsif @zombie_objects[id] == ruby_object_id
560
- @zombie_objects.delete(id)
561
- @stats[:collected_objects] += 1
562
567
  end
563
568
  end
564
569
 
565
570
  # This method returns a Hash with some statistics about this store.
566
571
  def statistics
567
- @stats.in_memory_objects = @in_memory_objects.length
568
- @stats.root_objects = @root_objects.length
569
- @stats.zombie_objects = @zombie_objects.length
572
+ @lock.synchronize do
573
+ @stats.in_memory_objects = @in_memory_objects.length
574
+ @stats.root_objects = @root_objects.length
575
+ end
570
576
 
571
577
  @stats
572
578
  end
573
579
 
574
580
  private
575
581
 
582
+ def object_by_id_internal(id)
583
+ if (ruby_object_id = @in_memory_objects[id])
584
+ # We have the object in memory so we can just return it.
585
+ begin
586
+ object = ObjectSpace._id2ref(ruby_object_id)
587
+ # Let's make sure the object is really the object we are looking
588
+ # for. The GC might have recycled it already and the Ruby object ID
589
+ # could now be used for another object.
590
+ if object.is_a?(ObjectBase) && object._id == id
591
+ return object
592
+ end
593
+ rescue RangeError => e
594
+ # Due to a race condition the object can still be in the
595
+ # @in_memory_objects list but has been collected already by the Ruby
596
+ # GC. The _collect() call has not been completed yet. We now have to
597
+ # wait until this has been done. I think the GC lock will prevent a
598
+ # race on @in_memory_objects.
599
+ GC.start
600
+ while @in_memory_objects.include?(id)
601
+ sleep 0.01
602
+ end
603
+ end
604
+ end
605
+
606
+ # This is just a safety check. It has never triggered, so we can disable
607
+ # it for now.
608
+ #if (obj = @cache.object_by_id(id))
609
+ # PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in " +
610
+ # "cache but not in_memory"
611
+ #end
612
+
613
+ # We don't have the object in memory. Let's find it in the storage.
614
+ if @db.include?(id)
615
+ # Great, object found. Read it into memory and return it.
616
+ obj = ObjectBase::read(self, id)
617
+ # Add the object to the in-memory storage list.
618
+ @cache.cache_read(obj)
619
+
620
+ return obj
621
+ end
622
+
623
+ # The requested object does not exist. Return nil.
624
+ nil
625
+ end
626
+
576
627
  # Mark phase of a mark-and-sweep garbage collector. It will mark all
577
628
  # objects that are reachable from the root objects.
578
629
  def mark
@@ -599,6 +650,7 @@ module PEROBS
599
650
  @stats.swept_objects = @db.delete_unmarked_objects do |id|
600
651
  @cache.evict(id)
601
652
  end
653
+ @db.clear_marks
602
654
  GC.start
603
655
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
604
656
  @stats.swept_objects
@@ -1,4 +1,4 @@
1
1
  module PEROBS
2
2
  # The version number
3
- VERSION = "4.2.0"
3
+ VERSION = "4.5.0"
4
4
  end
data/lib/perobs.rb CHANGED
@@ -28,3 +28,4 @@
28
28
  require "perobs/version"
29
29
  require 'perobs/Store'
30
30
  require 'perobs/ConsoleProgressMeter'
31
+ require 'perobs/FuzzyStringMatcher'
data/perobs.gemspec CHANGED
@@ -20,5 +20,5 @@ GEM_SPEC = Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency 'bundler', '~> 2.3'
22
22
  spec.add_development_dependency 'yard', '~>0.9.12'
23
- spec.add_development_dependency 'rake', '~> 12.3.3'
23
+ spec.add_development_dependency 'rake', '~> 13.0.3'
24
24
  end
@@ -265,5 +265,35 @@ describe PEROBS::FlatFileDB do
265
265
  db.close
266
266
  end
267
267
 
268
+ it 'should handle duplicate entries for the same ID in database.blobs file' do
269
+ @store.exit
270
+
271
+ db = PEROBS::FlatFileDB.new(@db_dir)
272
+ db_file = File.join(@db_dir, 'database.blobs')
273
+ db.open
274
+ 0.upto(5) do |i|
275
+ db.put_object("#{i + 1}:#{'X' * (i + 1) * 30}$", i + 1)
276
+ end
277
+ db.close
278
+
279
+ # This appends the entry 2 again
280
+ blob2 = File.read(db_file, 319 - 199, 199)
281
+ File.write(db_file, blob2, File.size(db_file))
282
+
283
+ db.open
284
+ expect(db.check_db).to eql(2)
285
+ expect(db.check_db(true)).to eql(1)
286
+ db.close
287
+ db = PEROBS::FlatFileDB.new(@db_dir, { :log => $stderr,
288
+ :log_level => Logger::WARN })
289
+ db.open
290
+ expect(db.check_db).to eql(0)
291
+
292
+ 0.upto(5) do |i|
293
+ expect(db.get_object(i + 1)).to eql("#{i + 1}:#{'X' * (i + 1) * 30}$")
294
+ end
295
+ db.close
296
+ end
297
+
268
298
  end
269
299
 
@@ -29,13 +29,25 @@ require 'perobs/FuzzyStringMatcher'
29
29
 
30
30
  module PEROBS
31
31
 
32
+ class WordRef < PEROBS::Object
33
+
34
+ attr_persist :word, :line
35
+
36
+ def initialize(store, word, line)
37
+ super(store)
38
+ self.word = word
39
+ self.line = line
40
+ end
41
+
42
+ end
43
+
32
44
  describe FuzzyStringMatcher do
33
45
 
34
46
  before(:all) do
35
47
  @db_name = generate_db_name(__FILE__)
36
48
  @store = PEROBS::Store.new(@db_name)
37
- @fsm = FuzzyStringMatcher.new(@store, 'test')
38
- @fsm2 = FuzzyStringMatcher.new(@store, 'test', true, 2)
49
+ @store['fsm'] = @fsm = @store.new(FuzzyStringMatcher)
50
+ @store['fsm2'] = @fsm2 = @store.new(FuzzyStringMatcher, true, 2)
39
51
  end
40
52
 
41
53
  after(:all) do
@@ -103,6 +115,44 @@ module PEROBS
103
115
  expect(@fsm.best_matches('foobar')).to eql([])
104
116
  end
105
117
 
118
+ it 'should find a match' do
119
+ dut = {
120
+ [ 'one' ] => [ [ 'one', 1.0 ] ],
121
+ [ 'three' ] => [ [ 'three', 1.0 ] ],
122
+ [ 'four' ]=> [ [ 'four', 1.0 ], [ 'fourteen', 0.666 ] ],
123
+ [ 'four', 1.0 ]=> [ [ 'four', 1.0 ] ],
124
+ [ 'even' ] => [ [ 'seven', 0.666 ], [ 'eleven', 0.666 ] ],
125
+ [ 'teen' ] => [ ['thirteen', 0.6666666666666666],
126
+ ['fourteen', 0.6666666666666666],
127
+ ['fifteen', 0.6666666666666666],
128
+ ['sixteen', 0.6666666666666666],
129
+ ['seventeen', 0.6666666666666666],
130
+ ['eighteen', 0.6666666666666666],
131
+ ['nineteen', 0.6666666666666666] ],
132
+ [ 'aight' ] => [ [ 'eight', 0.5 ] ],
133
+ [ 'thirdteen' ] => [ [ 'thirteen', 0.5 ] ],
134
+ [ 'shirt teen', 0.3 ] => [ [ 'thirteen', 0.333 ] ]
135
+ }
136
+ check_data_under_test(@fsm, dut)
137
+ end
138
+
139
+ it 'should sort best to worst matches' do
140
+ @fsm.clear
141
+ %w( xbar xfoox foor bar foobar barfoo foo rab baar fool xbarx
142
+ foobarx xfoobarx foo_bar ).each do |w|
143
+ @fsm.learn(w, w)
144
+ end
145
+ dut = {
146
+ [ 'foo' ] => [["foo", 1.0], ["foor", 0.5], ["foobar", 0.5],
147
+ ["fool", 0.5], ["foobarx", 0.5], ["foo_bar", 0.5],
148
+ ["barfoo", 0.5]],
149
+ [ 'bar' ] => [["bar", 1.0], ["barfoo", 0.5], ["xbar", 0.5],
150
+ ["foobar", 0.5], ["foo_bar", 0.5]],
151
+ [ 'foobar' ] => [["foobar", 1.0], ["foobarx", 0.8], ["xfoobarx", 0.6]]
152
+ }
153
+ check_data_under_test(@fsm, dut)
154
+ end
155
+
106
156
  it 'should handle a larger text' do
107
157
  text =<<-EOT
108
158
  MIT License
@@ -131,9 +181,9 @@ EOT
131
181
  @fsm2.learn(word, word)
132
182
  end
133
183
  stats = @fsm2.stats
134
- expect(stats['dictionary_size']).to eql(363)
184
+ expect(stats['dictionary_size']).to eql(352)
135
185
  expect(stats['max_list_size']).to eql(22)
136
- expect(stats['avg_list_size']).to be_within(0.001).of(2.366)
186
+ expect(stats['avg_list_size']).to be_within(0.001).of(2.409)
137
187
  end
138
188
 
139
189
  it 'should find case sensitive matches' do
@@ -145,6 +195,46 @@ EOT
145
195
  check_data_under_test(@fsm2, dut)
146
196
  end
147
197
 
198
+ it 'should support references to PEROBS objects' do
199
+ text =<<-EOT
200
+ MIT License
201
+
202
+ Permission is hereby granted, free of charge, to any person obtaining
203
+ a copy of this software and associated documentation files (the
204
+ "Software"), to deal in the Software without restriction, including
205
+ without limitation the rights to use, copy, modify, merge, publish,
206
+ distribute, sublicense, and/or sell copies of the Software, and to
207
+ permit persons to whom the Software is furnished to do so, subject to
208
+ the following conditions:
209
+ EOT
210
+
211
+ line_no = 1
212
+ @store['fsm'] = fsm = @store.new(FuzzyStringMatcher)
213
+ @store['refs'] = refs = @store.new(Array)
214
+ text.each_line do |line|
215
+ line.split.each do |word|
216
+ ref = @store.new(WordRef, word, line_no)
217
+ refs << ref
218
+ fsm.learn(word, ref)
219
+ end
220
+ line_no += 1
221
+ end
222
+
223
+ found_lines = []
224
+ fsm.best_matches('SOFTWARE').each do |match|
225
+ found_lines << match[0].line
226
+ end
227
+ expect(found_lines.sort).to eql([ 4, 5, 5, 7, 8 ])
228
+ end
229
+
230
+ it 'should with small search words' do
231
+ @fsm.clear
232
+ mats = 'Yukihiro Matsumoto'
233
+ @fsm.learn(mats)
234
+ expect(@fsm.best_matches('Yukihiro').first.first).to eql(mats)
235
+ expect(@fsm.best_matches('Mats', 0.3).first.first).to eql(mats)
236
+ end
237
+
148
238
  def check_data_under_test(fsm, dut)
149
239
  dut.each do |inputs, reference|
150
240
  key = inputs[0]