text 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ module Text
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
4
  MINOR = 0
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -34,23 +34,24 @@ module Text
34
34
  end
35
35
 
36
36
  def similarity(str1, str2)
37
- pairs1 = word_letter_pairs(str1)
38
- pairs2 = word_letter_pairs(str2)
37
+ pairs1, length1 = word_letter_pairs(str1)
38
+ pairs2, length2 = word_letter_pairs(str2)
39
39
 
40
40
  intersection = pairs1.inject(0) { |acc, pair|
41
41
  pairs2.include?(pair) ? acc + 1 : acc
42
42
  }
43
- union = pairs1.length + pairs2.length
43
+ union = length1 + length2
44
44
 
45
45
  (2.0 * intersection) / union
46
46
  end
47
47
 
48
48
  private
49
49
  def word_letter_pairs(str)
50
- @word_letter_pairs[str] ||= Set.new(
51
- str.upcase.split(/\s+/).map{ |word|
52
- (0 ... (word.length - 1)).map { |i| str[i, 2] }
50
+ @word_letter_pairs[str] ||= (
51
+ pairs = str.upcase.split(/\s+/).map{ |word|
52
+ (0 ... (word.length - 1)).map { |i| word[i, 2] }
53
53
  }.flatten
54
+ [Set.new(pairs), pairs.length]
54
55
  )
55
56
  end
56
57
  end
@@ -26,4 +26,13 @@ class WhiteSimilarityTest < Test::Unit::TestCase
26
26
  assert_in_delta 0.25, white.similarity(word, "Help"), 0.01
27
27
  assert_in_delta 0.0, white.similarity(word, "Sold"), 0.01
28
28
  end
29
+
30
+ def test_similarity_with_examples_from_article
31
+ assert_in_delta 0.4, Text::WhiteSimilarity.similarity("GGGGG", "GG"), 0.01
32
+ assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"), 0.01
33
+ assert_in_delta 0.0, Text::WhiteSimilarity.similarity("FRANCE", "QUEBEC"), 0.01
34
+ assert_in_delta 0.72, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF FRANCE"), 0.01
35
+ assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"), 0.01
36
+ end
37
+
29
38
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2011-11-22 00:00:00.000000000Z
14
+ date: 2011-12-13 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: ! 'A collection of text algorithms: Levenshtein, Soundex, Metaphone,
17
17
  Double Metaphone, Figlet, Porter Stemming'