jaro 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/jaro.rb +16 -5
- metadata +2 -2
data/jaro.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
class String
|
2
|
-
# Jaro distance
|
4
|
+
# Jaro-Winkler distance
|
5
|
+
# @param [String] other string
|
6
|
+
# @return [Float] distance, normalized between 0.0 (no match) and 1.0 (perfect match)
|
3
7
|
def ^(other)
|
4
|
-
return 0 if self
|
8
|
+
return 1.0 if self == other
|
9
|
+
return 0.0 if self.empty? or other.empty?
|
10
|
+
|
5
11
|
s1 = self.codepoints.to_a
|
6
12
|
s2 = other.codepoints.to_a
|
7
13
|
s1, s2 = s2, s1 if s1.size > s2.size
|
@@ -10,7 +16,7 @@ class String
|
|
10
16
|
m, t = 0.0, 0
|
11
17
|
max_dist = s2s/2 - 1
|
12
18
|
|
13
|
-
m1 = Array.new(s1s,
|
19
|
+
m1 = Array.new(s1s, -1)
|
14
20
|
m2 = Array.new(s2s, false)
|
15
21
|
|
16
22
|
# find m
|
@@ -25,12 +31,14 @@ class String
|
|
25
31
|
end
|
26
32
|
end
|
27
33
|
end
|
34
|
+
|
35
|
+
return 0.0 if m.zero?
|
28
36
|
|
29
37
|
m1.reduce do |a, b|
|
30
38
|
# if either a or b are nil, that means there was no match
|
31
39
|
# if a > b, that means the previous value is greater than the current
|
32
40
|
# which means it went down
|
33
|
-
if a
|
41
|
+
if a > -1 and b > -1 and a > b
|
34
42
|
t += (a-b > 1 ? 1 : 2)
|
35
43
|
end
|
36
44
|
b
|
@@ -41,7 +49,10 @@ class String
|
|
41
49
|
# winkler adjustment
|
42
50
|
l = 0
|
43
51
|
for i in 0..3
|
44
|
-
s1[i]
|
52
|
+
if s1[i] != s2[i]
|
53
|
+
l = i
|
54
|
+
break
|
55
|
+
end
|
45
56
|
end
|
46
57
|
|
47
58
|
# standard weight (p) for winkler == 0.1
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jaro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-29 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Adds String#^ for measuring string similarity
|
15
15
|
email: moshee@displaynone.us
|