text 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/text/version.rb +1 -1
- data/lib/text/white_similarity.rb +7 -6
- data/test/test_white_similarity.rb +9 -0
- metadata +2 -2
data/lib/text/version.rb
CHANGED
@@ -34,23 +34,24 @@ module Text
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def similarity(str1, str2)
|
37
|
-
pairs1 = word_letter_pairs(str1)
|
38
|
-
pairs2 = word_letter_pairs(str2)
|
37
|
+
pairs1, length1 = word_letter_pairs(str1)
|
38
|
+
pairs2, length2 = word_letter_pairs(str2)
|
39
39
|
|
40
40
|
intersection = pairs1.inject(0) { |acc, pair|
|
41
41
|
pairs2.include?(pair) ? acc + 1 : acc
|
42
42
|
}
|
43
|
-
union =
|
43
|
+
union = length1 + length2
|
44
44
|
|
45
45
|
(2.0 * intersection) / union
|
46
46
|
end
|
47
47
|
|
48
48
|
private
|
49
49
|
def word_letter_pairs(str)
|
50
|
-
@word_letter_pairs[str] ||=
|
51
|
-
str.upcase.split(/\s+/).map{ |word|
|
52
|
-
(0 ... (word.length - 1)).map { |i|
|
50
|
+
@word_letter_pairs[str] ||= (
|
51
|
+
pairs = str.upcase.split(/\s+/).map{ |word|
|
52
|
+
(0 ... (word.length - 1)).map { |i| word[i, 2] }
|
53
53
|
}.flatten
|
54
|
+
[Set.new(pairs), pairs.length]
|
54
55
|
)
|
55
56
|
end
|
56
57
|
end
|
@@ -26,4 +26,13 @@ class WhiteSimilarityTest < Test::Unit::TestCase
|
|
26
26
|
assert_in_delta 0.25, white.similarity(word, "Help"), 0.01
|
27
27
|
assert_in_delta 0.0, white.similarity(word, "Sold"), 0.01
|
28
28
|
end
|
29
|
+
|
30
|
+
def test_similarity_with_examples_from_article
|
31
|
+
assert_in_delta 0.4, Text::WhiteSimilarity.similarity("GGGGG", "GG"), 0.01
|
32
|
+
assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"), 0.01
|
33
|
+
assert_in_delta 0.0, Text::WhiteSimilarity.similarity("FRANCE", "QUEBEC"), 0.01
|
34
|
+
assert_in_delta 0.72, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF FRANCE"), 0.01
|
35
|
+
assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"), 0.01
|
36
|
+
end
|
37
|
+
|
29
38
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2011-
|
14
|
+
date: 2011-12-13 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: ! 'A collection of text algorithms: Levenshtein, Soundex, Metaphone,
|
17
17
|
Double Metaphone, Figlet, Porter Stemming'
|