jaro 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/jaro.rb +16 -5
  2. metadata +2 -2
data/jaro.rb CHANGED
@@ -1,7 +1,13 @@
1
+ # encoding: utf-8
2
+
1
3
  class String
2
- # Jaro distance
4
+ # Jaro-Winkler distance
5
+ # @param [String] other string
6
+ # @return [Float] distance, normalized between 0.0 (no match) and 1.0 (perfect match)
3
7
  def ^(other)
4
- return 0 if self.empty? or other.empty?
8
+ return 1.0 if self == other
9
+ return 0.0 if self.empty? or other.empty?
10
+
5
11
  s1 = self.codepoints.to_a
6
12
  s2 = other.codepoints.to_a
7
13
  s1, s2 = s2, s1 if s1.size > s2.size
@@ -10,7 +16,7 @@ class String
10
16
  m, t = 0.0, 0
11
17
  max_dist = s2s/2 - 1
12
18
 
13
- m1 = Array.new(s1s, nil)
19
+ m1 = Array.new(s1s, -1)
14
20
  m2 = Array.new(s2s, false)
15
21
 
16
22
  # find m
@@ -25,12 +31,14 @@ class String
25
31
  end
26
32
  end
27
33
  end
34
+
35
+ return 0.0 if m.zero?
28
36
 
29
37
  m1.reduce do |a, b|
30
38
  # if either a or b are nil, that means there was no match
31
39
  # if a > b, that means the previous value is greater than the current
32
40
  # which means it went down
33
- if a != nil and b != nil and a > b
41
+ if a > -1 and b > -1 and a > b
34
42
  t += (a-b > 1 ? 1 : 2)
35
43
  end
36
44
  b
@@ -41,7 +49,10 @@ class String
41
49
  # winkler adjustment
42
50
  l = 0
43
51
  for i in 0..3
44
- s1[i] == s2[i] ? l += 1 : break
52
+ if s1[i] != s2[i]
53
+ l = i
54
+ break
55
+ end
45
56
  end
46
57
 
47
58
  # standard weight (p) for winkler == 0.1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-28 00:00:00.000000000 Z
12
+ date: 2012-09-29 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Adds String#^ for measuring string similarity
15
15
  email: moshee@displaynone.us