levenshtein 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG ADDED
@@ -0,0 +1,34 @@
1
+ 0.2.2 (16-03-2012)
2
+
3
+ * Simplified code.
4
+
5
+ 0.2.1 (11-03-2012)
6
+
7
+ * Better memory handling.
8
+
9
+ * Little speed improvements.
10
+
11
+ * Ruby 1.9 compatible?
12
+
13
+ 0.2.0 (11-07-2009)
14
+
15
+ * Return 0 instead of 0.0 in case of empty strings.
16
+
17
+ * Added specific support for arrays.
18
+
19
+ * Added specific support for arrays of strings.
20
+
21
+ * Added generic support for all (?) kind of sequences.
22
+
23
+ * Moved a lot of code to the C world.
24
+
25
+ 0.1.1 (06-10-2008)
26
+
27
+ * If one of the strings was both the begin and the end of the
28
+ other string, it would be stripped from both ends. Example:
29
+ Levenshtein.distance("abracadabra", "abra") resulted in 3
30
+ instead of 7. It's fixed now.
31
+
32
+ 0.1.0 (24-05-2008)
33
+
34
+ * First release.
data/README CHANGED
@@ -1,8 +1,15 @@
1
- # The Levenshtein distance is a metric for measuring the amount of difference
2
- # between two sequences (i.e., the so called edit distance). The Levenshtein
3
- # distance between two strings is given by the minimum number of operations
4
- # needed to transform one string into the other, where an operation is an
5
- # insertion, deletion, or substitution of a single character.
6
- #
7
- # More information about the Levenshtein distance algorithm:
8
- # http://en.wikipedia.org/wiki/Levenshtein_distance .
1
+ The Levenshtein distance is a metric for measuring the amount
2
+ of difference between two sequences (i.e., the so called edit
3
+ distance). The Levenshtein distance between two sequences is
4
+ given by the minimum number of operations needed to transform
5
+ one sequence into the other, where an operation is an
6
+ insertion, deletion, or substitution of a single element.
7
+
8
+ The two sequences can be two strings, two arrays, or two other
9
+ objects responding to :each. All sequences are by generic
10
+ (fast) C code.
11
+
12
+ All objects in the sequences should respond to :hash and :eql?.
13
+
14
+ More information about the Levenshtein distance algorithm:
15
+ http://en.wikipedia.org/wiki/Levenshtein_distance .
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.2
@@ -2,4 +2,9 @@ require "mkmf"
2
2
 
3
3
  dir_config("levenshtein")
4
4
 
5
- create_makefile("levenshtein/levenshtein_c")
5
+ have_library("levenshtein_array")
6
+ have_library("levenshtein_array_of_strings")
7
+ have_library("levenshtein_generic")
8
+ have_library("levenshtein_string")
9
+
10
+ create_makefile("levenshtein/levenshtein_fast")
@@ -0,0 +1,13 @@
1
+ #ifdef RARRAY_PTR
2
+ #else
3
+ #define RARRAY_PTR(o) (RARRAY(o)->ptr)
4
+ #define RARRAY_LEN(o) (RARRAY(o)->len)
5
+ #endif
6
+
7
+ #ifdef RSTRING_PTR
8
+ #else
9
+ #define RSTRING_PTR(o) (RSTRING(o)->ptr)
10
+ #define RSTRING_LEN(o) (RSTRING(o)->len)
11
+ #endif
12
+
13
+ VALUE mLevenshtein;
@@ -1,25 +1,27 @@
1
1
  #include "ruby.h"
2
+ #include "levenshtein.h"
2
3
 
3
- static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VALUE rb_threshold) {
4
- VALUE rb_s3;
4
+ VALUE levenshtein_distance_fast(VALUE self, VALUE rb_o1, VALUE rb_o2, VALUE rb_threshold) {
5
+ VALUE *p1, *p2;
6
+ long l1, l2;
7
+ long col, row;
5
8
  int threshold;
6
- int l1, l2, l3;
7
- char *s1, *s2, *s3;
8
- int *prev_row, *curr_row;
9
- int col, row;
9
+ int *prev_row, *curr_row, *temp_row;
10
10
  int curr_row_min, result;
11
+ int value1, value2;
11
12
 
12
- /* Convert Ruby's s1 to C's s1. */
13
+ /* Be sure that all equivalent objects in rb_o1 and rb_o2 (a.eql?(b) == true) are taken from a pool (a.equal?(b) == true). */
14
+ /* This is done in levenshtein.rb by means of Util.pool. */
13
15
 
14
- rb_s1 = StringValue(rb_s1);
15
- s1 = RSTRING(rb_s1)->ptr;
16
- l1 = RSTRING(rb_s1)->len;
16
+ /* Get the sizes of both arrays. */
17
17
 
18
- /* Convert Ruby's s2 to C's s2. */
18
+ l1 = RARRAY_LEN(rb_o1);
19
+ l2 = RARRAY_LEN(rb_o2);
19
20
 
20
- rb_s2 = StringValue(rb_s2);
21
- s2 = RSTRING(rb_s2)->ptr;
22
- l2 = RSTRING(rb_s2)->len;
21
+ /* Get the pointers of both arrays. */
22
+
23
+ p1 = RARRAY_PTR(rb_o1);
24
+ p2 = RARRAY_PTR(rb_o2);
23
25
 
24
26
  /* Convert Ruby's threshold to C's threshold. */
25
27
 
@@ -29,7 +31,7 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
29
31
  threshold = -1;
30
32
  }
31
33
 
32
- /* The Levenshtein Algorithm itself. */
34
+ /* The Levenshtein algorithm itself. */
33
35
 
34
36
  /* s1= */
35
37
  /* ERIK */
@@ -43,15 +45,11 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
43
45
  /* T 65555 */
44
46
  /* R 76566 */
45
47
  /* A 87667 */
46
-
48
+
47
49
  /* Allocate memory for both rows */
48
50
 
49
- prev_row = ALLOC_N(int, l1+1);
50
- curr_row = ALLOC_N(int, l1+1);
51
-
52
- if ((prev_row == NULL) || (curr_row == NULL)) {
53
- rb_raise(rb_eNoMemError, "out of memory");
54
- }
51
+ prev_row = (int*) ALLOC_N(int, (l1+1));
52
+ curr_row = (int*) ALLOC_N(int, (l1+1));
55
53
 
56
54
  /* Initialize the current row. */
57
55
 
@@ -62,7 +60,9 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
62
60
  for (row=1; row<=l2; row++) {
63
61
  /* Copy the current row to the previous row. */
64
62
 
65
- memcpy(prev_row, curr_row, sizeof(int)*(l1+1));
63
+ temp_row = prev_row;
64
+ prev_row = curr_row;
65
+ curr_row = temp_row;
66
66
 
67
67
  /* Calculate the values of the current row. */
68
68
 
@@ -70,27 +70,31 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
70
70
  curr_row_min = row;
71
71
 
72
72
  for (col=1; col<=l1; col++) {
73
- /* Equal (cost=0) or Substitution (cost=1). */
73
+ /* Equal (cost=0) or substitution (cost=1). */
74
74
 
75
- curr_row[col] = prev_row[col-1] + ((s1[col-1] == s2[row-1]) ? 0 : 1);
75
+ value1 = prev_row[col-1] + ((p1[col-1] == p2[row-1]) ? 0 : 1);
76
76
 
77
77
  /* Insertion if it's cheaper than substitution. */
78
78
 
79
- if (prev_row[col]+1 < curr_row[col]) {
80
- curr_row[col] = prev_row[col]+1;
79
+ value2 = prev_row[col]+1;
80
+ if (value2 < value1) {
81
+ value1 = value2;
81
82
  }
82
83
 
83
84
  /* Deletion if it's cheaper than substitution. */
84
85
 
85
- if (curr_row[col-1]+1 < curr_row[col]) {
86
- curr_row[col] = curr_row[col-1]+1;
86
+ value2 = curr_row[col-1]+1;
87
+ if (value2 < value1) {
88
+ value1 = value2;
87
89
  }
88
90
 
89
91
  /* Keep track of the minimum value on this row. */
90
92
 
91
- if (curr_row[col] < curr_row_min) {
92
- curr_row_min = curr_row[col];
93
+ if (value1 < curr_row_min) {
94
+ curr_row_min = value1;
93
95
  }
96
+
97
+ curr_row[col] = value1;
94
98
  }
95
99
 
96
100
  /* Return nil as soon as we exceed the threshold. */
@@ -115,8 +119,8 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
115
119
  return INT2FIX(result);
116
120
  }
117
121
 
118
- void Init_levenshtein_c() {
119
- VALUE mLevenshtein = rb_define_module("Levenshtein");
122
+ void Init_levenshtein_fast() {
123
+ mLevenshtein = rb_const_get(rb_mKernel, rb_intern("Levenshtein"));
120
124
 
121
- rb_define_singleton_method(mLevenshtein, "distance_part2_fast" , levenshtein_distance_part2, 3);
125
+ rb_define_singleton_method(mLevenshtein, "distance_fast" , levenshtein_distance_fast, 3);
122
126
  }
@@ -0,0 +1,5 @@
1
+ # encoding: UTF-8
2
+
3
+ module Levenshtein
4
+ VERSION = "0.2.2"
5
+ end
data/lib/levenshtein.rb CHANGED
@@ -1,100 +1,148 @@
1
- begin
2
- require "levenshtein/levenshtein_c"
3
- rescue LoadError
4
- begin
5
- require "levenshtein_c"
6
- rescue LoadError
7
- $stderr.puts "WARNING: Couldn't find the fast C implementation of Levenshtein.distance_part2. Using the slow Ruby version instead."
8
- end
9
- end
1
+ # encoding: UTF-8
10
2
 
11
- # The Levenshtein distance is a metric for measuring the amount of difference
12
- # between two sequences (i.e., the so called edit distance). The Levenshtein
13
- # distance between two strings is given by the minimum number of operations
14
- # needed to transform one string into the other, where an operation is an
15
- # insertion, deletion, or substitution of a single character.
16
- #
17
- # More information about the Levenshtein distance algorithm:
18
- # http://en.wikipedia.org/wiki/Levenshtein_distance .
3
+ require "levenshtein/version"
19
4
 
20
5
  module Levenshtein
21
- # Returns the Levenshtein distance as a number bestween 0.0 and 1.0.
22
- # It's basically the Levenshtein distance divided by the length of the longest string.
23
-
24
- def self.normalized_distance(s1, s2, threshold=nil)
25
- s1, s2 = s2, s1 if s1.length > s2.length # s1 is the short one; s2 is the long one.
26
-
27
- if s2.empty?
28
- 0.0 # Since s1.length < s2.length, s1 must be empty as well.
6
+ # Returns the Levenshtein distance as a number between 0.0 and
7
+ # 1.0. It's basically the Levenshtein distance divided by the
8
+ # size of the longest sequence.
9
+
10
+ def self.normalized_distance(a1, a2, threshold=nil, options={})
11
+ size = [a1.size, a2.size].max
12
+
13
+ if a1.size == 0 and a2.size == 0
14
+ 0.0
15
+ elsif a1.size == 0
16
+ a2.size.to_f/size
17
+ elsif a2.size == 0
18
+ a1.size.to_f/size
29
19
  else
30
20
  if threshold
31
- if d = self.distance(s1, s2, (threshold*s2.length+1).to_i)
32
- d.to_f/s2.length
21
+ if d = self.distance(a1, a2, (threshold*size).to_i+1)
22
+ d.to_f/size
33
23
  else
34
24
  nil
35
25
  end
36
26
  else
37
- self.distance(s1, s2).to_f/s2.length
27
+ self.distance(a1, a2).to_f/size
38
28
  end
39
29
  end
40
30
  end
41
31
 
42
- # Returns the Levenshtein distance between two byte strings.
32
+ # Returns the Levenshtein distance between two sequences.
33
+ #
34
+ # The two sequences can be two strings, two arrays, or two other
35
+ # objects responding to :each. All sequences are by generic
36
+ # (fast) C code.
37
+ #
38
+ # All objects in the sequences should respond to :hash and :eql?.
43
39
 
44
- def self.distance(s1, s2, threshold=nil)
45
- s1, s2 = s2, s1 if s1.length > s2.length # s1 is the short one; s2 is the long one.
40
+ def self.distance(a1, a2, threshold=nil, options={})
41
+ a1, a2 = a1.scan(/./), a2.scan(/./) if String === a1 and String === a2
42
+ a1, a2 = Util.pool(a1, a2)
46
43
 
47
44
  # Handle some basic circumstances.
48
45
 
49
- return 0.0 if s1 == s2
50
- return s2.length if s1.empty?
51
- return nil if threshold and (s2.length-s1.length) >= threshold
52
- return nil if threshold and (s1.scan(/./) - s2.scan(/./)).length >= threshold
53
- return nil if threshold and (s2.scan(/./) - s1.scan(/./)).length >= threshold
46
+ return 0 if a1 == a2
47
+ return a2.size if a1.empty?
48
+ return a1.size if a2.empty?
54
49
 
55
- # Do the expensive calculation on a subset of the strings only, if possible.
50
+ if threshold
51
+ return nil if (a1.size-a2.size) >= threshold
52
+ return nil if (a2.size-a1.size) >= threshold
53
+ return nil if (a1-a2).size >= threshold
54
+ return nil if (a2-a1).size >= threshold
55
+ end
56
+
57
+ # Remove the common prefix and the common postfix.
56
58
 
57
- b = 0
58
- e1 = s1.length-1
59
- e2 = s2.length-1
59
+ l1 = a1.size
60
+ l2 = a2.size
60
61
 
61
- while s1[b, 1] == s2[b, 1]
62
- b += 1
62
+ offset = 0
63
+ no_more_optimizations = true
64
+
65
+ while offset < l1 and offset < l2 and a1[offset].equal?(a2[offset])
66
+ offset += 1
67
+
68
+ no_more_optimizations = false
63
69
  end
70
+
71
+ while offset < l1 and offset < l2 and a1[l1-1].equal?(a2[l2-1])
72
+ l1 -= 1
73
+ l2 -= 1
64
74
 
65
- while s1[e1, 1] == s2[e2, 1]
66
- e1 -= 1
67
- e2 -= 1
75
+ no_more_optimizations = false
68
76
  end
77
+
78
+ if no_more_optimizations
79
+ distance_fast_or_slow(a1, a2, threshold, options)
80
+ else
81
+ l1 -= offset
82
+ l2 -= offset
69
83
 
70
- distance_part2(s1[b..e1], s2[b..e2], threshold)
84
+ a1 = a1[offset, l1]
85
+ a2 = a2[offset, l2]
86
+
87
+ distance(a1, a2, threshold, options)
88
+ end
71
89
  end
72
90
 
73
- def self.distance_part2(s1, s2, threshold) # :nodoc:
74
- if respond_to?(:distance_part2_fast)
75
- distance_part2_fast(s1, s2, threshold) # Implemented in C.
91
+ def self.distance_fast_or_slow(a1, a2, threshold, options) # :nodoc:
92
+ if respond_to?(:distance_fast) and options[:force_slow]
93
+ distance_fast(a1, a2, threshold) # Implemented in C.
76
94
  else
77
- distance_part2_slow(s1, s2, threshold) # Implemented in Ruby.
95
+ distance_slow(a1, a2, threshold) # Implemented in Ruby.
78
96
  end
79
97
  end
80
98
 
81
- def self.distance_part2_slow(s1, s2, threshold) # :nodoc:
82
- row = (0..s1.length).to_a
99
+ def self.distance_slow(a1, a2, threshold) # :nodoc:
100
+ crow = (0..a1.size).to_a
83
101
 
84
- 1.upto(s2.length) do |y|
85
- prow = row
86
- row = [y]
102
+ 1.upto(a2.size) do |y|
103
+ prow = crow
104
+ crow = [y]
87
105
 
88
- 1.upto(s1.length) do |x|
89
- row[x] = [prow[x]+1, row[x-1]+1, prow[x-1]+(s1[x-1]==s2[y-1] ? 0 : 1)].min
106
+ 1.upto(a1.size) do |x|
107
+ crow[x] = [prow[x]+1, crow[x-1]+1, prow[x-1]+(a1[x-1].equal?(a2[y-1]) ? 0 : 1)].min
90
108
  end
91
109
 
92
- # Stop analysing this string as soon as the best possible result for this string is bigger than the best result so far.
93
- # (The minimum value in the next row will be equal to or greater than the minimum value in this row.)
110
+ # Stop analysing this sequence as soon as the best possible
111
+ # result for this sequence is bigger than the best result so far.
112
+ # (The minimum value in the next row will be equal to or greater
113
+ # than the minimum value in this row.)
114
+
115
+ return nil if threshold and crow.min >= threshold
116
+ end
117
+
118
+ crow[-1]
119
+ end
120
+
121
+ module Util # :nodoc:
122
+ def self.pool(*args)
123
+ # So we can compare pointers instead of objects (equal?() instead of ==()).
124
+
125
+ pool = {}
126
+
127
+ args.collect do |arg|
128
+ a = []
129
+
130
+ arg.each do |o|
131
+ a << pool[o] ||= o
132
+ end
94
133
 
95
- return nil if threshold and row.min >= threshold
134
+ a
135
+ end
96
136
  end
137
+ end
138
+ end
97
139
 
98
- row[-1]
140
+ begin
141
+ require "levenshtein/levenshtein_fast" # Compiled by RubyGems.
142
+ rescue LoadError
143
+ begin
144
+ require "levenshtein_fast" # Compiled by the build script.
145
+ rescue LoadError
146
+ $stderr.puts "WARNING: Couldn't find the fast C implementation of Levenshtein. Using the much slower Ruby version instead."
99
147
  end
100
148
  end
data/test/test.rb CHANGED
@@ -1,6 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+
1
4
  require "test/unit"
2
5
  require "levenshtein"
3
6
 
7
+ module Levenshtein
8
+ class TestSequence
9
+ def initialize(o)
10
+ @sequence = o
11
+ end
12
+
13
+ def each
14
+ @sequence.length.times do |pos|
15
+ yield(@sequence[pos])
16
+ end
17
+ end
18
+ end
19
+
20
+ class TestElement
21
+ attr_reader :object
22
+
23
+ def initialize(o)
24
+ @object = o
25
+ end
26
+
27
+ def hash
28
+ @object.hash
29
+ end
30
+
31
+ def eql?(other)
32
+ @object.eql?(other.object)
33
+ end
34
+ end
35
+ end
36
+
4
37
  class TestLevenshtein < Test::Unit::TestCase
5
38
  def test_erik_veenstra
6
39
  assert_equal(7, Levenshtein.distance("erik", "veenstra"))
@@ -30,9 +63,11 @@ class TestLevenshtein < Test::Unit::TestCase
30
63
 
31
64
  def test_threshold
32
65
  assert_equal(3, Levenshtein.distance("foo", "foobar"))
66
+ assert_equal(3, Levenshtein.distance("foo", "foobar", 4))
33
67
  assert_equal(nil, Levenshtein.distance("foo", "foobar", 2))
34
68
 
35
69
  assert_in_delta(0.5, Levenshtein.normalized_distance("foo", "foobar"), 0.01)
70
+ assert_in_delta(0.5, Levenshtein.normalized_distance("foo", "foobar", 0.66), 0.01)
36
71
  assert_equal(nil, Levenshtein.normalized_distance("foo", "foobar", 0.30))
37
72
  end
38
73
 
@@ -40,51 +75,80 @@ class TestLevenshtein < Test::Unit::TestCase
40
75
  assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd"))
41
76
  assert_equal(3, Levenshtein.distance("ab123", "abxyz"))
42
77
  assert_equal(3, Levenshtein.distance("123cd", "xyzcd"))
78
+ assert_equal(5, Levenshtein.distance("123cd123", "123"))
43
79
 
44
80
  assert_in_delta(0.42, Levenshtein.normalized_distance("ab123cd", "abxyzcd"), 0.01)
45
81
  assert_in_delta(0.6, Levenshtein.normalized_distance("ab123", "abxyz"), 0.01)
46
82
  assert_in_delta(0.6, Levenshtein.normalized_distance("123cd", "xyzcd"), 0.01)
83
+ assert_in_delta(0.625, Levenshtein.normalized_distance("123cd123", "123"), 0.01)
84
+ end
85
+
86
+ def test_interface
87
+ seq1 = Levenshtein::TestSequence.new("erik".scan(/./).collect{|e| Levenshtein::TestElement.new(e)})
88
+ seq2 = Levenshtein::TestSequence.new("veenstra".scan(/./).collect{|e| Levenshtein::TestElement.new(e)})
89
+
90
+ assert_equal(7, Levenshtein.distance(seq1, seq2))
47
91
  end
48
92
  end
49
93
 
50
- class TestLevenshteinPart2Slow < Test::Unit::TestCase
94
+ class TestLevenshteinFast < Test::Unit::TestCase
51
95
  def test_erik_veenstra
52
- assert_equal(7, Levenshtein.distance_part2_slow("erik", "veenstra", nil))
96
+ assert_equal(7, Levenshtein.distance("erik", "veenstra", nil, :force_slow=>false))
97
+ assert_equal(7, Levenshtein.distance("veenstra", "erik", nil, :force_slow=>false))
53
98
  end
54
99
 
55
100
  def test_empty_string
56
- assert_equal(0, Levenshtein.distance_part2_slow("", "", nil))
57
- assert_equal(3, Levenshtein.distance_part2_slow("", "foo", nil))
101
+ assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>false))
102
+ assert_equal(3, Levenshtein.distance("", "foo", nil, :force_slow=>false))
103
+ assert_equal(3, Levenshtein.distance("foo", "", nil, :force_slow=>false))
58
104
  end
59
105
 
60
106
  def test_same_string
61
- assert_equal(0, Levenshtein.distance_part2_slow("", "", nil))
62
- assert_equal(0, Levenshtein.distance_part2_slow("foo", "foo", nil))
107
+ assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>false))
108
+ assert_equal(0, Levenshtein.distance("foo", "foo", nil, :force_slow=>false))
63
109
  end
64
110
 
65
111
  def test_threshold
66
- assert_equal(3, Levenshtein.distance_part2_slow("foo", "foobar", nil))
67
- assert_equal(nil, Levenshtein.distance_part2_slow("foo", "foobar", 2))
112
+ assert_equal(3, Levenshtein.distance("foo", "foobar", nil, :force_slow=>false))
113
+ assert_equal(3, Levenshtein.distance("foo", "foobar", 4, :force_slow=>false))
114
+ assert_equal(nil, Levenshtein.distance("foo", "foobar", 2, :force_slow=>false))
115
+ end
116
+
117
+ def test_same_head_and_or_tail
118
+ assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd", nil, :force_slow=>false))
119
+ assert_equal(3, Levenshtein.distance("ab123", "abxyz", nil, :force_slow=>false))
120
+ assert_equal(3, Levenshtein.distance("123cd", "xyzcd", nil, :force_slow=>false))
121
+ assert_equal(5, Levenshtein.distance("123cd123", "123", nil, :force_slow=>false))
68
122
  end
69
123
  end
70
124
 
71
- class TestLevenshteinPart2Fast < Test::Unit::TestCase
125
+ class TestLevenshteinSlow < Test::Unit::TestCase
72
126
  def test_erik_veenstra
73
- assert_equal(7, Levenshtein.distance_part2_fast("erik", "veenstra", nil))
127
+ assert_equal(7, Levenshtein.distance("erik", "veenstra", nil, :force_slow=>true))
128
+ assert_equal(7, Levenshtein.distance("veenstra", "erik", nil, :force_slow=>true))
74
129
  end
75
130
 
76
131
  def test_empty_string
77
- assert_equal(0, Levenshtein.distance_part2_fast("", "", nil))
78
- assert_equal(3, Levenshtein.distance_part2_fast("", "foo", nil))
132
+ assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>true))
133
+ assert_equal(3, Levenshtein.distance("", "foo", nil, :force_slow=>true))
134
+ assert_equal(3, Levenshtein.distance("foo", "", nil, :force_slow=>true))
79
135
  end
80
136
 
81
137
  def test_same_string
82
- assert_equal(0, Levenshtein.distance_part2_fast("", "", nil))
83
- assert_equal(0, Levenshtein.distance_part2_fast("foo", "foo", nil))
138
+ assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>true))
139
+ assert_equal(0, Levenshtein.distance("foo", "foo", nil, :force_slow=>true))
84
140
  end
85
141
 
86
142
  def test_threshold
87
- assert_equal(3, Levenshtein.distance_part2_fast("foo", "foobar", nil))
88
- assert_equal(nil, Levenshtein.distance_part2_fast("foo", "foobar", 2))
143
+ assert_equal(3, Levenshtein.distance("foo", "foobar", nil, :force_slow=>true))
144
+ assert_equal(3, Levenshtein.distance("foo", "foobar", 4, :force_slow=>true))
145
+ assert_equal(nil, Levenshtein.distance("foo", "foobar", 2, :force_slow=>true))
146
+ end
147
+
148
+ def test_same_head_and_or_tail
149
+ assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd", nil, :force_slow=>true))
150
+ assert_equal(3, Levenshtein.distance("ab123", "abxyz", nil, :force_slow=>true))
151
+ assert_equal(3, Levenshtein.distance("123cd", "xyzcd", nil, :force_slow=>true))
152
+ assert_equal(5, Levenshtein.distance("123cd123", "123", nil, :force_slow=>true))
89
153
  end
90
154
  end
metadata CHANGED
@@ -1,65 +1,64 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: levenshtein
3
- version: !ruby/object:Gem::Version
4
- version: 0.1.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.2
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - Erik Veenstra
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
-
12
- date: 2008-05-24 00:00:00 +02:00
13
- default_executable:
12
+ date: 2012-03-16 00:00:00.000000000 Z
14
13
  dependencies: []
15
-
16
14
  description: Calculates the Levenshtein distance between two byte strings.
17
15
  email: levenshtein@erikveen.dds.nl
18
16
  executables: []
19
-
20
- extensions:
17
+ extensions:
21
18
  - ext/levenshtein/extconf.rb
22
19
  extra_rdoc_files: []
23
-
24
- files:
20
+ files:
21
+ - lib/levenshtein/version.rb
25
22
  - lib/levenshtein.rb
26
- - ext/levenshtein
23
+ - ext/levenshtein/levenshtein.h
24
+ - ext/levenshtein/levenshtein_fast.c
27
25
  - ext/levenshtein/extconf.rb
28
- - ext/levenshtein/levenshtein_c.c
29
26
  - README
30
27
  - LICENSE
31
28
  - VERSION
32
- has_rdoc: true
29
+ - CHANGELOG
30
+ - test/test.rb
33
31
  homepage: http://www.erikveen.dds.nl/levenshtein/index.html
32
+ licenses: []
34
33
  post_install_message:
35
- rdoc_options:
34
+ rdoc_options:
36
35
  - README
37
36
  - LICENSE
38
37
  - VERSION
38
+ - CHANGELOG
39
39
  - --title
40
- - levenshtein (0.1.0)
40
+ - levenshtein (0.2.2)
41
41
  - --main
42
42
  - README
43
- require_paths:
43
+ require_paths:
44
44
  - lib
45
- required_ruby_version: !ruby/object:Gem::Requirement
46
- requirements:
47
- - - ">="
48
- - !ruby/object:Gem::Version
49
- version: "0"
50
- version:
51
- required_rubygems_version: !ruby/object:Gem::Requirement
52
- requirements:
53
- - - ">="
54
- - !ruby/object:Gem::Version
55
- version: "0"
56
- version:
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
57
  requirements: []
58
-
59
58
  rubyforge_project: levenshtein
60
- rubygems_version: 1.0.1
59
+ rubygems_version: 1.8.18
61
60
  signing_key:
62
- specification_version: 2
61
+ specification_version: 3
63
62
  summary: Calculates the Levenshtein distance between two byte strings.
64
- test_files:
63
+ test_files:
65
64
  - test/test.rb