text 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 990ce640247dc172ad5e84a54bec8349ccdf55bb
4
- data.tar.gz: 25c46a8bfe4fd410959be538b10b2a288e70cbcb
3
+ metadata.gz: 84969e89321dfeb30067b4b526928330964281a5
4
+ data.tar.gz: 79da28e5584e6ecd0d35c18cafb5bbd5a1e46989
5
5
  SHA512:
6
- metadata.gz: 6d506f2ae153149a6ebd288ce61a540931fd5d944bc661a582ba8a1fffbdaaf454873e690308202826127d8849c3ec8892969100ad224c51cf61aa5d20fde48c
7
- data.tar.gz: 5ffcc3933066fb5131f7da6b438f89fec87d047bc2703e5ad7b4d0ed3a433c4de8c1862c66b8ff7b6dbdcb7aa7e9646c9f8594cd3e6a0630e7a25b0b7bf8e7fb
6
+ metadata.gz: 49a658384d4f666da44a3bfea41c44632fa7a9b444ae132c079c87ac236be27ce34fb0f38c320c992aba51068c621c073f9fa7c184d883019921ea2710a6359d
7
+ data.tar.gz: cc04c2a8736339afd408514853df8fc0d8dd298be0adbbc2c6b47d0aaeceb31a534be8ca395efc34221933c7a4894cd5a5724a5da81b2cf8f14302ad178b9dd1
@@ -36,14 +36,15 @@ module Levenshtein
36
36
 
37
37
  private
38
38
  def distance_with_maximum(str1, str2, max_distance) # :nodoc:
39
- s, t = [str1, str2].sort_by(&:length).
40
- map{ |str| str.encode(Encoding::UTF_8).unpack("U*") }
39
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
40
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
41
+
41
42
  n = s.length
42
43
  m = t.length
43
44
  big_int = n * m
44
- return m if n.zero?
45
- return n if m.zero?
46
- return 0 if s == t
45
+
46
+ # Swap if necessary so that s is always the shorter of the two strings
47
+ s, t, n, m = t, s, m, n if m < n
47
48
 
48
49
  # If the length difference is already greater than the max_distance, then
49
50
  # there is nothing else to check
@@ -51,6 +52,10 @@ private
51
52
  return max_distance
52
53
  end
53
54
 
55
+ return 0 if s == t
56
+ return m if n.zero?
57
+ return n if m.zero?
58
+
54
59
  # The values necessary for our threshold are written; the ones after must
55
60
  # be filled with large integers since the tailing member of the threshold
56
61
  # window in the bottom array will run min across them
@@ -84,10 +89,12 @@ private
84
89
  # computer science and computational biology.
85
90
  # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
86
91
  # pp. 263–264.
87
- min = [0, i - max_distance - 1].max
88
- max = [m - 1, i + max_distance].min
92
+ min = i - max_distance - 1
93
+ min = 0 if min < 0
94
+ max = i + max_distance
95
+ max = m - 1 if max > m - 1
89
96
 
90
- (min .. max).each do |j|
97
+ min.upto(max) do |j|
91
98
  # If the diagonal value is already greater than the max_distance
92
99
  # then we can safety return: the diagonal will never go lower again.
93
100
  # See: http://www.levenshtein.net/
@@ -96,11 +103,11 @@ private
96
103
  end
97
104
 
98
105
  cost = s[i] == t[j] ? 0 : 1
99
- x = [
100
- d[j+1] + 1, # insertion
101
- e + 1, # deletion
102
- d[j] + cost # substitution
103
- ].min
106
+ insertion = d[j + 1] + 1
107
+ deletion = e + 1
108
+ substitution = d[j] + cost
109
+ x = insertion < deletion ? insertion : deletion
110
+ x = substitution if substitution < x
104
111
 
105
112
  d[j] = e
106
113
  e = x
@@ -116,9 +123,12 @@ private
116
123
  end
117
124
 
118
125
  def distance_without_maximum(str1, str2) # :nodoc:
119
- s, t = [str1, str2].map{ |str| str.encode(Encoding::UTF_8).unpack("U*") }
126
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
127
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
128
+
120
129
  n = s.length
121
130
  m = t.length
131
+
122
132
  return m if n.zero?
123
133
  return n if m.zero?
124
134
 
@@ -128,12 +138,13 @@ private
128
138
  n.times do |i|
129
139
  e = i + 1
130
140
  m.times do |j|
131
- cost = (s[i] == t[j]) ? 0 : 1
132
- x = [
133
- d[j+1] + 1, # insertion
134
- e + 1, # deletion
135
- d[j] + cost # substitution
136
- ].min
141
+ cost = s[i] == t[j] ? 0 : 1
142
+ insertion = d[j + 1] + 1
143
+ deletion = e + 1
144
+ substitution = d[j] + cost
145
+ x = insertion < deletion ? insertion : deletion
146
+ x = substitution if substitution < x
147
+
137
148
  d[j] = e
138
149
  e = x
139
150
  end
@@ -2,7 +2,7 @@ module Text
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
4
  MINOR = 3
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -48,6 +48,12 @@ class LevenshteinTest < Test::Unit::TestCase
48
48
  assert_equal 3, distance("kitten", "sitting", 4)
49
49
  end
50
50
 
51
+ def test_should_return_calculated_distance_when_less_than_maximum_for_empty_strings
52
+ assert_equal 3, distance("", "cat", 4)
53
+ assert_equal 3, distance("cat", "", 5)
54
+ assert_equal 0, distance("", "", 2)
55
+ end
56
+
51
57
  def test_should_return_calculated_distance_when_same_as_maximum
52
58
  assert_equal 0, distance("test", "test", 0)
53
59
  assert_equal 1, distance("test", "tent", 1)
@@ -55,12 +61,23 @@ class LevenshteinTest < Test::Unit::TestCase
55
61
  assert_equal 3, distance("kitten", "sitting", 3)
56
62
  end
57
63
 
64
+ def test_should_return_calculated_distance_when_same_as_maximum_for_empty_strings
65
+ assert_equal 3, distance("", "cat", 3)
66
+ assert_equal 3, distance("cat", "", 3)
67
+ assert_equal 0, distance("", "", 0)
68
+ end
69
+
58
70
  def test_should_return_specified_maximum_if_distance_is_more
59
71
  assert_equal 1, distance("gumbo", "gambol", 1)
60
72
  assert_equal 2, distance("kitten", "sitting", 2)
61
73
  assert_equal 1, distance("test", "tasf", 1)
62
74
  end
63
75
 
76
+ def test_should_return_specified_maximum_if_distance_is_more_for_empty_strings
77
+ assert_equal 2, distance("kitten", "", 2)
78
+ assert_equal 3, distance("", "kitten", 3)
79
+ end
80
+
64
81
  def test_should_return_maximum_distance_for_strings_with_additions_at_start
65
82
  assert_equal 1, distance("1234", "01234")
66
83
  assert_equal 0, distance("1234", "01234", 0)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Battley
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-06-23 00:00:00.000000000 Z
13
+ date: 2015-04-13 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake
@@ -80,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
80
80
  version: '0'
81
81
  requirements: []
82
82
  rubyforge_project: text
83
- rubygems_version: 2.2.2
83
+ rubygems_version: 2.4.5
84
84
  signing_key:
85
85
  specification_version: 4
86
86
  summary: A collection of text algorithms