text 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/text/version.rb CHANGED
@@ -2,7 +2,7 @@ module Text
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
4
  MINOR = 0
5
- TINY = 2
5
+ TINY = 3
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -34,25 +34,28 @@ module Text
34
34
  end
35
35
 
36
36
  def similarity(str1, str2)
37
- pairs1, length1 = word_letter_pairs(str1)
38
- pairs2, length2 = word_letter_pairs(str2)
37
+ pairs1 = word_letter_pairs(str1)
38
+ pairs2 = word_letter_pairs(str2).dup
39
39
 
40
- intersection = pairs1.inject(0) { |acc, pair|
41
- pairs2.include?(pair) ? acc + 1 : acc
42
- }
43
- union = length1 + length2
40
+ union = pairs1.count + pairs2.count
41
+
42
+ intersection = 0
43
+ pairs1.each_with_index do |pair1|
44
+ if index = pairs2.index(pair1)
45
+ intersection += 1
46
+ pairs2.delete_at(index)
47
+ end
48
+ end
44
49
 
45
50
  (2.0 * intersection) / union
46
51
  end
47
52
 
48
53
  private
49
54
  def word_letter_pairs(str)
50
- @word_letter_pairs[str] ||= (
51
- pairs = str.upcase.split(/\s+/).map{ |word|
55
+ @word_letter_pairs[str] ||=
56
+ str.upcase.split(/\s+/).map{ |word|
52
57
  (0 ... (word.length - 1)).map { |i| word[i, 2] }
53
- }.flatten
54
- [Set.new(pairs), pairs.length]
55
- )
58
+ }.flatten.freeze
56
59
  end
57
60
  end
58
61
  end
@@ -27,6 +27,12 @@ class WhiteSimilarityTest < Test::Unit::TestCase
27
27
  assert_in_delta 0.0, white.similarity(word, "Sold"), 0.01
28
28
  end
29
29
 
30
+ def test_should_not_clobber_cached_values
31
+ white = Text::WhiteSimilarity.new
32
+ word = "Healed"
33
+ assert_equal white.similarity(word, word), white.similarity(word, word)
34
+ end
35
+
30
36
  def test_similarity_with_examples_from_article
31
37
  assert_in_delta 0.4, Text::WhiteSimilarity.similarity("GGGGG", "GG"), 0.01
32
38
  assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"), 0.01
@@ -35,4 +41,9 @@ class WhiteSimilarityTest < Test::Unit::TestCase
35
41
  assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"), 0.01
36
42
  end
37
43
 
44
+ def test_similarity_with_equal_strings
45
+ assert_equal 1.0, Text::WhiteSimilarity.similarity("aaaaa", "aaaaa")
46
+ assert_equal 1.0, Text::WhiteSimilarity.similarity("REPUBLIC OF CUBA", "REPUBLIC OF CUBA")
47
+ end
48
+
38
49
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2011-12-13 00:00:00.000000000 Z
14
+ date: 2012-01-09 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: ! 'A collection of text algorithms: Levenshtein, Soundex, Metaphone,
17
17
  Double Metaphone, Figlet, Porter Stemming'