RubyGems - text - Versions diffs - 1.0.2 → 1.0.3 - Mend

text 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/lib/text/version.rb +1 -1
data/lib/text/white_similarity.rb +14 -11
data/test/test_white_similarity.rb +11 -0
metadata +2 -2

data/lib/text/version.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Text
   module VERSION #:nodoc:
     MAJOR = 1
     MINOR = 0
-    TINY  = 2
+    TINY  = 3
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/lib/text/white_similarity.rb CHANGED Viewed

@@ -34,25 +34,28 @@ module Text
     end
     def similarity(str1, str2)
-      pairs1, length1 = word_letter_pairs(str1)
-      pairs2, length2 = word_letter_pairs(str2)
+      pairs1 = word_letter_pairs(str1)
+      pairs2 = word_letter_pairs(str2).dup
-      intersection = pairs1.inject(0) { |acc, pair|
-        pairs2.include?(pair) ? acc + 1 : acc
-      }
-      union = length1 + length2
+      union = pairs1.count + pairs2.count
+      intersection = 0
+      pairs1.each_with_index do |pair1|
+        if index = pairs2.index(pair1)
+          intersection += 1
+          pairs2.delete_at(index)
+        end
+      end
       (2.0 * intersection) / union
     end
   private
     def word_letter_pairs(str)
-      @word_letter_pairs[str] ||= (
-        pairs = str.upcase.split(/\s+/).map{ |word|
+      @word_letter_pairs[str] ||=
+        str.upcase.split(/\s+/).map{ |word|
           (0 ... (word.length - 1)).map { |i| word[i, 2] }
-        }.flatten
-        [Set.new(pairs), pairs.length]
-      )
+        }.flatten.freeze
     end
   end
 end

data/test/test_white_similarity.rb CHANGED Viewed

@@ -27,6 +27,12 @@ class WhiteSimilarityTest < Test::Unit::TestCase
     assert_in_delta 0.0,  white.similarity(word, "Sold"),    0.01
   end
+  def test_should_not_clobber_cached_values
+    white = Text::WhiteSimilarity.new
+    word = "Healed"
+    assert_equal white.similarity(word, word), white.similarity(word, word)
+  end
   def test_similarity_with_examples_from_article
     assert_in_delta 0.4,  Text::WhiteSimilarity.similarity("GGGGG", "GG"),                           0.01
     assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"),          0.01
@@ -35,4 +41,9 @@ class WhiteSimilarityTest < Test::Unit::TestCase
     assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"),   0.01
   end
+  def test_similarity_with_equal_strings
+    assert_equal 1.0, Text::WhiteSimilarity.similarity("aaaaa", "aaaaa")
+    assert_equal 1.0, Text::WhiteSimilarity.similarity("REPUBLIC OF CUBA", "REPUBLIC OF CUBA")
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: text
 version: !ruby/object:Gem::Version
-  version: 1.0.2
+  version: 1.0.3
   prerelease:
 platform: ruby
 authors:
@@ -11,7 +11,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-12-13 00:00:00.000000000 Z
+date: 2012-01-09 00:00:00.000000000 Z
 dependencies: []
 description: ! 'A collection of text algorithms: Levenshtein, Soundex, Metaphone,
   Double Metaphone, Figlet, Porter Stemming'