text 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@ module Text
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
4
  MINOR = 0
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -34,23 +34,24 @@ module Text
34
34
  end
35
35
 
36
36
  def similarity(str1, str2)
37
- pairs1 = word_letter_pairs(str1)
38
- pairs2 = word_letter_pairs(str2)
37
+ pairs1, length1 = word_letter_pairs(str1)
38
+ pairs2, length2 = word_letter_pairs(str2)
39
39
 
40
40
  intersection = pairs1.inject(0) { |acc, pair|
41
41
  pairs2.include?(pair) ? acc + 1 : acc
42
42
  }
43
- union = pairs1.length + pairs2.length
43
+ union = length1 + length2
44
44
 
45
45
  (2.0 * intersection) / union
46
46
  end
47
47
 
48
48
  private
49
49
  def word_letter_pairs(str)
50
- @word_letter_pairs[str] ||= Set.new(
51
- str.upcase.split(/\s+/).map{ |word|
52
- (0 ... (word.length - 1)).map { |i| str[i, 2] }
50
+ @word_letter_pairs[str] ||= (
51
+ pairs = str.upcase.split(/\s+/).map{ |word|
52
+ (0 ... (word.length - 1)).map { |i| word[i, 2] }
53
53
  }.flatten
54
+ [Set.new(pairs), pairs.length]
54
55
  )
55
56
  end
56
57
  end
@@ -26,4 +26,13 @@ class WhiteSimilarityTest < Test::Unit::TestCase
26
26
  assert_in_delta 0.25, white.similarity(word, "Help"), 0.01
27
27
  assert_in_delta 0.0, white.similarity(word, "Sold"), 0.01
28
28
  end
29
+
30
+ def test_similarity_with_examples_from_article
31
+ assert_in_delta 0.4, Text::WhiteSimilarity.similarity("GGGGG", "GG"), 0.01
32
+ assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"), 0.01
33
+ assert_in_delta 0.0, Text::WhiteSimilarity.similarity("FRANCE", "QUEBEC"), 0.01
34
+ assert_in_delta 0.72, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF FRANCE"), 0.01
35
+ assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"), 0.01
36
+ end
37
+
29
38
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2011-11-22 00:00:00.000000000Z
14
+ date: 2011-12-13 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: ! 'A collection of text algorithms: Levenshtein, Soundex, Metaphone,
17
17
  Double Metaphone, Figlet, Porter Stemming'