text 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/text/version.rb CHANGED
@@ -2,7 +2,7 @@ module Text
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
4
  MINOR = 0
5
- TINY = 2
5
+ TINY = 3
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -34,25 +34,28 @@ module Text
34
34
  end
35
35
 
36
36
  def similarity(str1, str2)
37
- pairs1, length1 = word_letter_pairs(str1)
38
- pairs2, length2 = word_letter_pairs(str2)
37
+ pairs1 = word_letter_pairs(str1)
38
+ pairs2 = word_letter_pairs(str2).dup
39
39
 
40
- intersection = pairs1.inject(0) { |acc, pair|
41
- pairs2.include?(pair) ? acc + 1 : acc
42
- }
43
- union = length1 + length2
40
+ union = pairs1.count + pairs2.count
41
+
42
+ intersection = 0
43
+ pairs1.each_with_index do |pair1|
44
+ if index = pairs2.index(pair1)
45
+ intersection += 1
46
+ pairs2.delete_at(index)
47
+ end
48
+ end
44
49
 
45
50
  (2.0 * intersection) / union
46
51
  end
47
52
 
48
53
  private
49
54
  def word_letter_pairs(str)
50
- @word_letter_pairs[str] ||= (
51
- pairs = str.upcase.split(/\s+/).map{ |word|
55
+ @word_letter_pairs[str] ||=
56
+ str.upcase.split(/\s+/).map{ |word|
52
57
  (0 ... (word.length - 1)).map { |i| word[i, 2] }
53
- }.flatten
54
- [Set.new(pairs), pairs.length]
55
- )
58
+ }.flatten.freeze
56
59
  end
57
60
  end
58
61
  end
@@ -27,6 +27,12 @@ class WhiteSimilarityTest < Test::Unit::TestCase
27
27
  assert_in_delta 0.0, white.similarity(word, "Sold"), 0.01
28
28
  end
29
29
 
30
+ def test_should_not_clobber_cached_values
31
+ white = Text::WhiteSimilarity.new
32
+ word = "Healed"
33
+ assert_equal white.similarity(word, word), white.similarity(word, word)
34
+ end
35
+
30
36
  def test_similarity_with_examples_from_article
31
37
  assert_in_delta 0.4, Text::WhiteSimilarity.similarity("GGGGG", "GG"), 0.01
32
38
  assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"), 0.01
@@ -35,4 +41,9 @@ class WhiteSimilarityTest < Test::Unit::TestCase
35
41
  assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"), 0.01
36
42
  end
37
43
 
44
+ def test_similarity_with_equal_strings
45
+ assert_equal 1.0, Text::WhiteSimilarity.similarity("aaaaa", "aaaaa")
46
+ assert_equal 1.0, Text::WhiteSimilarity.similarity("REPUBLIC OF CUBA", "REPUBLIC OF CUBA")
47
+ end
48
+
38
49
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2011-12-13 00:00:00.000000000 Z
14
+ date: 2012-01-09 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: ! 'A collection of text algorithms: Levenshtein, Soundex, Metaphone,
17
17
  Double Metaphone, Figlet, Porter Stemming'