text 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 990ce640247dc172ad5e84a54bec8349ccdf55bb
4
- data.tar.gz: 25c46a8bfe4fd410959be538b10b2a288e70cbcb
3
+ metadata.gz: 84969e89321dfeb30067b4b526928330964281a5
4
+ data.tar.gz: 79da28e5584e6ecd0d35c18cafb5bbd5a1e46989
5
5
  SHA512:
6
- metadata.gz: 6d506f2ae153149a6ebd288ce61a540931fd5d944bc661a582ba8a1fffbdaaf454873e690308202826127d8849c3ec8892969100ad224c51cf61aa5d20fde48c
7
- data.tar.gz: 5ffcc3933066fb5131f7da6b438f89fec87d047bc2703e5ad7b4d0ed3a433c4de8c1862c66b8ff7b6dbdcb7aa7e9646c9f8594cd3e6a0630e7a25b0b7bf8e7fb
6
+ metadata.gz: 49a658384d4f666da44a3bfea41c44632fa7a9b444ae132c079c87ac236be27ce34fb0f38c320c992aba51068c621c073f9fa7c184d883019921ea2710a6359d
7
+ data.tar.gz: cc04c2a8736339afd408514853df8fc0d8dd298be0adbbc2c6b47d0aaeceb31a534be8ca395efc34221933c7a4894cd5a5724a5da81b2cf8f14302ad178b9dd1
@@ -36,14 +36,15 @@ module Levenshtein
36
36
 
37
37
  private
38
38
  def distance_with_maximum(str1, str2, max_distance) # :nodoc:
39
- s, t = [str1, str2].sort_by(&:length).
40
- map{ |str| str.encode(Encoding::UTF_8).unpack("U*") }
39
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
40
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
41
+
41
42
  n = s.length
42
43
  m = t.length
43
44
  big_int = n * m
44
- return m if n.zero?
45
- return n if m.zero?
46
- return 0 if s == t
45
+
46
+ # Swap if necessary so that s is always the shorter of the two strings
47
+ s, t, n, m = t, s, m, n if m < n
47
48
 
48
49
  # If the length difference is already greater than the max_distance, then
49
50
  # there is nothing else to check
@@ -51,6 +52,10 @@ private
51
52
  return max_distance
52
53
  end
53
54
 
55
+ return 0 if s == t
56
+ return m if n.zero?
57
+ return n if m.zero?
58
+
54
59
  # The values necessary for our threshold are written; the ones after must
55
60
  # be filled with large integers since the tailing member of the threshold
56
61
  # window in the bottom array will run min across them
@@ -84,10 +89,12 @@ private
84
89
  # computer science and computational biology.
85
90
  # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
86
91
  # pp. 263–264.
87
- min = [0, i - max_distance - 1].max
88
- max = [m - 1, i + max_distance].min
92
+ min = i - max_distance - 1
93
+ min = 0 if min < 0
94
+ max = i + max_distance
95
+ max = m - 1 if max > m - 1
89
96
 
90
- (min .. max).each do |j|
97
+ min.upto(max) do |j|
91
98
  # If the diagonal value is already greater than the max_distance
92
99
  # then we can safety return: the diagonal will never go lower again.
93
100
  # See: http://www.levenshtein.net/
@@ -96,11 +103,11 @@ private
96
103
  end
97
104
 
98
105
  cost = s[i] == t[j] ? 0 : 1
99
- x = [
100
- d[j+1] + 1, # insertion
101
- e + 1, # deletion
102
- d[j] + cost # substitution
103
- ].min
106
+ insertion = d[j + 1] + 1
107
+ deletion = e + 1
108
+ substitution = d[j] + cost
109
+ x = insertion < deletion ? insertion : deletion
110
+ x = substitution if substitution < x
104
111
 
105
112
  d[j] = e
106
113
  e = x
@@ -116,9 +123,12 @@ private
116
123
  end
117
124
 
118
125
  def distance_without_maximum(str1, str2) # :nodoc:
119
- s, t = [str1, str2].map{ |str| str.encode(Encoding::UTF_8).unpack("U*") }
126
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
127
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
128
+
120
129
  n = s.length
121
130
  m = t.length
131
+
122
132
  return m if n.zero?
123
133
  return n if m.zero?
124
134
 
@@ -128,12 +138,13 @@ private
128
138
  n.times do |i|
129
139
  e = i + 1
130
140
  m.times do |j|
131
- cost = (s[i] == t[j]) ? 0 : 1
132
- x = [
133
- d[j+1] + 1, # insertion
134
- e + 1, # deletion
135
- d[j] + cost # substitution
136
- ].min
141
+ cost = s[i] == t[j] ? 0 : 1
142
+ insertion = d[j + 1] + 1
143
+ deletion = e + 1
144
+ substitution = d[j] + cost
145
+ x = insertion < deletion ? insertion : deletion
146
+ x = substitution if substitution < x
147
+
137
148
  d[j] = e
138
149
  e = x
139
150
  end
@@ -2,7 +2,7 @@ module Text
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
4
  MINOR = 3
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -48,6 +48,12 @@ class LevenshteinTest < Test::Unit::TestCase
48
48
  assert_equal 3, distance("kitten", "sitting", 4)
49
49
  end
50
50
 
51
+ def test_should_return_calculated_distance_when_less_than_maximum_for_empty_strings
52
+ assert_equal 3, distance("", "cat", 4)
53
+ assert_equal 3, distance("cat", "", 5)
54
+ assert_equal 0, distance("", "", 2)
55
+ end
56
+
51
57
  def test_should_return_calculated_distance_when_same_as_maximum
52
58
  assert_equal 0, distance("test", "test", 0)
53
59
  assert_equal 1, distance("test", "tent", 1)
@@ -55,12 +61,23 @@ class LevenshteinTest < Test::Unit::TestCase
55
61
  assert_equal 3, distance("kitten", "sitting", 3)
56
62
  end
57
63
 
64
+ def test_should_return_calculated_distance_when_same_as_maximum_for_empty_strings
65
+ assert_equal 3, distance("", "cat", 3)
66
+ assert_equal 3, distance("cat", "", 3)
67
+ assert_equal 0, distance("", "", 0)
68
+ end
69
+
58
70
  def test_should_return_specified_maximum_if_distance_is_more
59
71
  assert_equal 1, distance("gumbo", "gambol", 1)
60
72
  assert_equal 2, distance("kitten", "sitting", 2)
61
73
  assert_equal 1, distance("test", "tasf", 1)
62
74
  end
63
75
 
76
+ def test_should_return_specified_maximum_if_distance_is_more_for_empty_strings
77
+ assert_equal 2, distance("kitten", "", 2)
78
+ assert_equal 3, distance("", "kitten", 3)
79
+ end
80
+
64
81
  def test_should_return_maximum_distance_for_strings_with_additions_at_start
65
82
  assert_equal 1, distance("1234", "01234")
66
83
  assert_equal 0, distance("1234", "01234", 0)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Battley
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-06-23 00:00:00.000000000 Z
13
+ date: 2015-04-13 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake
@@ -80,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
80
80
  version: '0'
81
81
  requirements: []
82
82
  rubyforge_project: text
83
- rubygems_version: 2.2.2
83
+ rubygems_version: 2.4.5
84
84
  signing_key:
85
85
  specification_version: 4
86
86
  summary: A collection of text algorithms