levenshtein 0.1.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +34 -0
- data/README +15 -8
- data/VERSION +1 -1
- data/ext/levenshtein/extconf.rb +6 -1
- data/ext/levenshtein/levenshtein.h +13 -0
- data/ext/levenshtein/{levenshtein_c.c → levenshtein_fast.c} +38 -34
- data/lib/levenshtein/version.rb +5 -0
- data/lib/levenshtein.rb +109 -61
- data/test/test.rb +80 -16
- metadata +33 -34
data/CHANGELOG
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
0.2.2 (16-03-2012)
|
2
|
+
|
3
|
+
* Simplified code.
|
4
|
+
|
5
|
+
0.2.1 (11-03-2012)
|
6
|
+
|
7
|
+
* Better memory handling.
|
8
|
+
|
9
|
+
* Little speed improvements.
|
10
|
+
|
11
|
+
* Ruby 1.9 compatible?
|
12
|
+
|
13
|
+
0.2.0 (11-07-2009)
|
14
|
+
|
15
|
+
* Return 0 instead of 0.0 in case of empty strings.
|
16
|
+
|
17
|
+
* Added specific support for arrays.
|
18
|
+
|
19
|
+
* Added specific support for arrays of strings.
|
20
|
+
|
21
|
+
* Added generic support for all (?) kind of sequences.
|
22
|
+
|
23
|
+
* Moved a lot of code to the C world.
|
24
|
+
|
25
|
+
0.1.1 (06-10-2008)
|
26
|
+
|
27
|
+
* If one of the strings was both the begin and the end of the
|
28
|
+
other string, it would be stripped from both ends. Example:
|
29
|
+
Levenshtein.distance("abracadabra", "abra") resulted in 3
|
30
|
+
instead of 7. It's fixed now.
|
31
|
+
|
32
|
+
0.1.0 (24-05-2008)
|
33
|
+
|
34
|
+
* First release.
|
data/README
CHANGED
@@ -1,8 +1,15 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
The Levenshtein distance is a metric for measuring the amount
|
2
|
+
of difference between two sequences (i.e., the so called edit
|
3
|
+
distance). The Levenshtein distance between two sequences is
|
4
|
+
given by the minimum number of operations needed to transform
|
5
|
+
one sequence into the other, where an operation is an
|
6
|
+
insertion, deletion, or substitution of a single element.
|
7
|
+
|
8
|
+
The two sequences can be two strings, two arrays, or two other
|
9
|
+
objects responding to :each. All sequences are by generic
|
10
|
+
(fast) C code.
|
11
|
+
|
12
|
+
All objects in the sequences should respond to :hash and :eql?.
|
13
|
+
|
14
|
+
More information about the Levenshtein distance algorithm:
|
15
|
+
http://en.wikipedia.org/wiki/Levenshtein_distance .
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.2
|
data/ext/levenshtein/extconf.rb
CHANGED
@@ -2,4 +2,9 @@ require "mkmf"
|
|
2
2
|
|
3
3
|
dir_config("levenshtein")
|
4
4
|
|
5
|
-
|
5
|
+
have_library("levenshtein_array")
|
6
|
+
have_library("levenshtein_array_of_strings")
|
7
|
+
have_library("levenshtein_generic")
|
8
|
+
have_library("levenshtein_string")
|
9
|
+
|
10
|
+
create_makefile("levenshtein/levenshtein_fast")
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifdef RARRAY_PTR
|
2
|
+
#else
|
3
|
+
#define RARRAY_PTR(o) (RARRAY(o)->ptr)
|
4
|
+
#define RARRAY_LEN(o) (RARRAY(o)->len)
|
5
|
+
#endif
|
6
|
+
|
7
|
+
#ifdef RSTRING_PTR
|
8
|
+
#else
|
9
|
+
#define RSTRING_PTR(o) (RSTRING(o)->ptr)
|
10
|
+
#define RSTRING_LEN(o) (RSTRING(o)->len)
|
11
|
+
#endif
|
12
|
+
|
13
|
+
VALUE mLevenshtein;
|
@@ -1,25 +1,27 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
+
#include "levenshtein.h"
|
2
3
|
|
3
|
-
|
4
|
-
VALUE
|
4
|
+
VALUE levenshtein_distance_fast(VALUE self, VALUE rb_o1, VALUE rb_o2, VALUE rb_threshold) {
|
5
|
+
VALUE *p1, *p2;
|
6
|
+
long l1, l2;
|
7
|
+
long col, row;
|
5
8
|
int threshold;
|
6
|
-
int
|
7
|
-
char *s1, *s2, *s3;
|
8
|
-
int *prev_row, *curr_row;
|
9
|
-
int col, row;
|
9
|
+
int *prev_row, *curr_row, *temp_row;
|
10
10
|
int curr_row_min, result;
|
11
|
+
int value1, value2;
|
11
12
|
|
12
|
-
/*
|
13
|
+
/* Be sure that all equivalent objects in rb_o1 and rb_o2 (a.eql?(b) == true) are taken from a pool (a.equal?(b) == true). */
|
14
|
+
/* This is done in levenshtein.rb by means of Util.pool. */
|
13
15
|
|
14
|
-
|
15
|
-
s1 = RSTRING(rb_s1)->ptr;
|
16
|
-
l1 = RSTRING(rb_s1)->len;
|
16
|
+
/* Get the sizes of both arrays. */
|
17
17
|
|
18
|
-
|
18
|
+
l1 = RARRAY_LEN(rb_o1);
|
19
|
+
l2 = RARRAY_LEN(rb_o2);
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
/* Get the pointers of both arrays. */
|
22
|
+
|
23
|
+
p1 = RARRAY_PTR(rb_o1);
|
24
|
+
p2 = RARRAY_PTR(rb_o2);
|
23
25
|
|
24
26
|
/* Convert Ruby's threshold to C's threshold. */
|
25
27
|
|
@@ -29,7 +31,7 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
|
|
29
31
|
threshold = -1;
|
30
32
|
}
|
31
33
|
|
32
|
-
/* The Levenshtein
|
34
|
+
/* The Levenshtein algorithm itself. */
|
33
35
|
|
34
36
|
/* s1= */
|
35
37
|
/* ERIK */
|
@@ -43,15 +45,11 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
|
|
43
45
|
/* T 65555 */
|
44
46
|
/* R 76566 */
|
45
47
|
/* A 87667 */
|
46
|
-
|
48
|
+
|
47
49
|
/* Allocate memory for both rows */
|
48
50
|
|
49
|
-
prev_row = ALLOC_N(int, l1+1);
|
50
|
-
curr_row = ALLOC_N(int, l1+1);
|
51
|
-
|
52
|
-
if ((prev_row == NULL) || (curr_row == NULL)) {
|
53
|
-
rb_raise(rb_eNoMemError, "out of memory");
|
54
|
-
}
|
51
|
+
prev_row = (int*) ALLOC_N(int, (l1+1));
|
52
|
+
curr_row = (int*) ALLOC_N(int, (l1+1));
|
55
53
|
|
56
54
|
/* Initialize the current row. */
|
57
55
|
|
@@ -62,7 +60,9 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
|
|
62
60
|
for (row=1; row<=l2; row++) {
|
63
61
|
/* Copy the current row to the previous row. */
|
64
62
|
|
65
|
-
|
63
|
+
temp_row = prev_row;
|
64
|
+
prev_row = curr_row;
|
65
|
+
curr_row = temp_row;
|
66
66
|
|
67
67
|
/* Calculate the values of the current row. */
|
68
68
|
|
@@ -70,27 +70,31 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
|
|
70
70
|
curr_row_min = row;
|
71
71
|
|
72
72
|
for (col=1; col<=l1; col++) {
|
73
|
-
/* Equal (cost=0) or
|
73
|
+
/* Equal (cost=0) or substitution (cost=1). */
|
74
74
|
|
75
|
-
|
75
|
+
value1 = prev_row[col-1] + ((p1[col-1] == p2[row-1]) ? 0 : 1);
|
76
76
|
|
77
77
|
/* Insertion if it's cheaper than substitution. */
|
78
78
|
|
79
|
-
|
80
|
-
|
79
|
+
value2 = prev_row[col]+1;
|
80
|
+
if (value2 < value1) {
|
81
|
+
value1 = value2;
|
81
82
|
}
|
82
83
|
|
83
84
|
/* Deletion if it's cheaper than substitution. */
|
84
85
|
|
85
|
-
|
86
|
-
|
86
|
+
value2 = curr_row[col-1]+1;
|
87
|
+
if (value2 < value1) {
|
88
|
+
value1 = value2;
|
87
89
|
}
|
88
90
|
|
89
91
|
/* Keep track of the minimum value on this row. */
|
90
92
|
|
91
|
-
if (
|
92
|
-
curr_row_min =
|
93
|
+
if (value1 < curr_row_min) {
|
94
|
+
curr_row_min = value1;
|
93
95
|
}
|
96
|
+
|
97
|
+
curr_row[col] = value1;
|
94
98
|
}
|
95
99
|
|
96
100
|
/* Return nil as soon as we exceed the threshold. */
|
@@ -115,8 +119,8 @@ static VALUE levenshtein_distance_part2(VALUE self, VALUE rb_s1, VALUE rb_s2, VA
|
|
115
119
|
return INT2FIX(result);
|
116
120
|
}
|
117
121
|
|
118
|
-
void
|
119
|
-
|
122
|
+
void Init_levenshtein_fast() {
|
123
|
+
mLevenshtein = rb_const_get(rb_mKernel, rb_intern("Levenshtein"));
|
120
124
|
|
121
|
-
rb_define_singleton_method(mLevenshtein, "
|
125
|
+
rb_define_singleton_method(mLevenshtein, "distance_fast" , levenshtein_distance_fast, 3);
|
122
126
|
}
|
data/lib/levenshtein.rb
CHANGED
@@ -1,100 +1,148 @@
|
|
1
|
-
|
2
|
-
require "levenshtein/levenshtein_c"
|
3
|
-
rescue LoadError
|
4
|
-
begin
|
5
|
-
require "levenshtein_c"
|
6
|
-
rescue LoadError
|
7
|
-
$stderr.puts "WARNING: Couldn't find the fast C implementation of Levenshtein.distance_part2. Using the slow Ruby version instead."
|
8
|
-
end
|
9
|
-
end
|
1
|
+
# encoding: UTF-8
|
10
2
|
|
11
|
-
|
12
|
-
# between two sequences (i.e., the so called edit distance). The Levenshtein
|
13
|
-
# distance between two strings is given by the minimum number of operations
|
14
|
-
# needed to transform one string into the other, where an operation is an
|
15
|
-
# insertion, deletion, or substitution of a single character.
|
16
|
-
#
|
17
|
-
# More information about the Levenshtein distance algorithm:
|
18
|
-
# http://en.wikipedia.org/wiki/Levenshtein_distance .
|
3
|
+
require "levenshtein/version"
|
19
4
|
|
20
5
|
module Levenshtein
|
21
|
-
# Returns the Levenshtein distance as a number
|
22
|
-
# It's basically the Levenshtein distance divided by the
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
6
|
+
# Returns the Levenshtein distance as a number between 0.0 and
|
7
|
+
# 1.0. It's basically the Levenshtein distance divided by the
|
8
|
+
# size of the longest sequence.
|
9
|
+
|
10
|
+
def self.normalized_distance(a1, a2, threshold=nil, options={})
|
11
|
+
size = [a1.size, a2.size].max
|
12
|
+
|
13
|
+
if a1.size == 0 and a2.size == 0
|
14
|
+
0.0
|
15
|
+
elsif a1.size == 0
|
16
|
+
a2.size.to_f/size
|
17
|
+
elsif a2.size == 0
|
18
|
+
a1.size.to_f/size
|
29
19
|
else
|
30
20
|
if threshold
|
31
|
-
if d = self.distance(
|
32
|
-
d.to_f/
|
21
|
+
if d = self.distance(a1, a2, (threshold*size).to_i+1)
|
22
|
+
d.to_f/size
|
33
23
|
else
|
34
24
|
nil
|
35
25
|
end
|
36
26
|
else
|
37
|
-
self.distance(
|
27
|
+
self.distance(a1, a2).to_f/size
|
38
28
|
end
|
39
29
|
end
|
40
30
|
end
|
41
31
|
|
42
|
-
# Returns the Levenshtein distance between two
|
32
|
+
# Returns the Levenshtein distance between two sequences.
|
33
|
+
#
|
34
|
+
# The two sequences can be two strings, two arrays, or two other
|
35
|
+
# objects responding to :each. All sequences are by generic
|
36
|
+
# (fast) C code.
|
37
|
+
#
|
38
|
+
# All objects in the sequences should respond to :hash and :eql?.
|
43
39
|
|
44
|
-
def self.distance(
|
45
|
-
|
40
|
+
def self.distance(a1, a2, threshold=nil, options={})
|
41
|
+
a1, a2 = a1.scan(/./), a2.scan(/./) if String === a1 and String === a2
|
42
|
+
a1, a2 = Util.pool(a1, a2)
|
46
43
|
|
47
44
|
# Handle some basic circumstances.
|
48
45
|
|
49
|
-
return 0
|
50
|
-
return
|
51
|
-
return
|
52
|
-
return nil if threshold and (s1.scan(/./) - s2.scan(/./)).length >= threshold
|
53
|
-
return nil if threshold and (s2.scan(/./) - s1.scan(/./)).length >= threshold
|
46
|
+
return 0 if a1 == a2
|
47
|
+
return a2.size if a1.empty?
|
48
|
+
return a1.size if a2.empty?
|
54
49
|
|
55
|
-
|
50
|
+
if threshold
|
51
|
+
return nil if (a1.size-a2.size) >= threshold
|
52
|
+
return nil if (a2.size-a1.size) >= threshold
|
53
|
+
return nil if (a1-a2).size >= threshold
|
54
|
+
return nil if (a2-a1).size >= threshold
|
55
|
+
end
|
56
|
+
|
57
|
+
# Remove the common prefix and the common postfix.
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
-
e2 = s2.length-1
|
59
|
+
l1 = a1.size
|
60
|
+
l2 = a2.size
|
60
61
|
|
61
|
-
|
62
|
-
|
62
|
+
offset = 0
|
63
|
+
no_more_optimizations = true
|
64
|
+
|
65
|
+
while offset < l1 and offset < l2 and a1[offset].equal?(a2[offset])
|
66
|
+
offset += 1
|
67
|
+
|
68
|
+
no_more_optimizations = false
|
63
69
|
end
|
70
|
+
|
71
|
+
while offset < l1 and offset < l2 and a1[l1-1].equal?(a2[l2-1])
|
72
|
+
l1 -= 1
|
73
|
+
l2 -= 1
|
64
74
|
|
65
|
-
|
66
|
-
e1 -= 1
|
67
|
-
e2 -= 1
|
75
|
+
no_more_optimizations = false
|
68
76
|
end
|
77
|
+
|
78
|
+
if no_more_optimizations
|
79
|
+
distance_fast_or_slow(a1, a2, threshold, options)
|
80
|
+
else
|
81
|
+
l1 -= offset
|
82
|
+
l2 -= offset
|
69
83
|
|
70
|
-
|
84
|
+
a1 = a1[offset, l1]
|
85
|
+
a2 = a2[offset, l2]
|
86
|
+
|
87
|
+
distance(a1, a2, threshold, options)
|
88
|
+
end
|
71
89
|
end
|
72
90
|
|
73
|
-
def self.
|
74
|
-
if respond_to?(:
|
75
|
-
|
91
|
+
def self.distance_fast_or_slow(a1, a2, threshold, options) # :nodoc:
|
92
|
+
if respond_to?(:distance_fast) and options[:force_slow]
|
93
|
+
distance_fast(a1, a2, threshold) # Implemented in C.
|
76
94
|
else
|
77
|
-
|
95
|
+
distance_slow(a1, a2, threshold) # Implemented in Ruby.
|
78
96
|
end
|
79
97
|
end
|
80
98
|
|
81
|
-
def self.
|
82
|
-
|
99
|
+
def self.distance_slow(a1, a2, threshold) # :nodoc:
|
100
|
+
crow = (0..a1.size).to_a
|
83
101
|
|
84
|
-
1.upto(
|
85
|
-
prow =
|
86
|
-
|
102
|
+
1.upto(a2.size) do |y|
|
103
|
+
prow = crow
|
104
|
+
crow = [y]
|
87
105
|
|
88
|
-
1.upto(
|
89
|
-
|
106
|
+
1.upto(a1.size) do |x|
|
107
|
+
crow[x] = [prow[x]+1, crow[x-1]+1, prow[x-1]+(a1[x-1].equal?(a2[y-1]) ? 0 : 1)].min
|
90
108
|
end
|
91
109
|
|
92
|
-
# Stop analysing this
|
93
|
-
#
|
110
|
+
# Stop analysing this sequence as soon as the best possible
|
111
|
+
# result for this sequence is bigger than the best result so far.
|
112
|
+
# (The minimum value in the next row will be equal to or greater
|
113
|
+
# than the minimum value in this row.)
|
114
|
+
|
115
|
+
return nil if threshold and crow.min >= threshold
|
116
|
+
end
|
117
|
+
|
118
|
+
crow[-1]
|
119
|
+
end
|
120
|
+
|
121
|
+
module Util # :nodoc:
|
122
|
+
def self.pool(*args)
|
123
|
+
# So we can compare pointers instead of objects (equal?() instead of ==()).
|
124
|
+
|
125
|
+
pool = {}
|
126
|
+
|
127
|
+
args.collect do |arg|
|
128
|
+
a = []
|
129
|
+
|
130
|
+
arg.each do |o|
|
131
|
+
a << pool[o] ||= o
|
132
|
+
end
|
94
133
|
|
95
|
-
|
134
|
+
a
|
135
|
+
end
|
96
136
|
end
|
137
|
+
end
|
138
|
+
end
|
97
139
|
|
98
|
-
|
140
|
+
begin
|
141
|
+
require "levenshtein/levenshtein_fast" # Compiled by RubyGems.
|
142
|
+
rescue LoadError
|
143
|
+
begin
|
144
|
+
require "levenshtein_fast" # Compiled by the build script.
|
145
|
+
rescue LoadError
|
146
|
+
$stderr.puts "WARNING: Couldn't find the fast C implementation of Levenshtein. Using the much slower Ruby version instead."
|
99
147
|
end
|
100
148
|
end
|
data/test/test.rb
CHANGED
@@ -1,6 +1,39 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
1
4
|
require "test/unit"
|
2
5
|
require "levenshtein"
|
3
6
|
|
7
|
+
module Levenshtein
|
8
|
+
class TestSequence
|
9
|
+
def initialize(o)
|
10
|
+
@sequence = o
|
11
|
+
end
|
12
|
+
|
13
|
+
def each
|
14
|
+
@sequence.length.times do |pos|
|
15
|
+
yield(@sequence[pos])
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class TestElement
|
21
|
+
attr_reader :object
|
22
|
+
|
23
|
+
def initialize(o)
|
24
|
+
@object = o
|
25
|
+
end
|
26
|
+
|
27
|
+
def hash
|
28
|
+
@object.hash
|
29
|
+
end
|
30
|
+
|
31
|
+
def eql?(other)
|
32
|
+
@object.eql?(other.object)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
4
37
|
class TestLevenshtein < Test::Unit::TestCase
|
5
38
|
def test_erik_veenstra
|
6
39
|
assert_equal(7, Levenshtein.distance("erik", "veenstra"))
|
@@ -30,9 +63,11 @@ class TestLevenshtein < Test::Unit::TestCase
|
|
30
63
|
|
31
64
|
def test_threshold
|
32
65
|
assert_equal(3, Levenshtein.distance("foo", "foobar"))
|
66
|
+
assert_equal(3, Levenshtein.distance("foo", "foobar", 4))
|
33
67
|
assert_equal(nil, Levenshtein.distance("foo", "foobar", 2))
|
34
68
|
|
35
69
|
assert_in_delta(0.5, Levenshtein.normalized_distance("foo", "foobar"), 0.01)
|
70
|
+
assert_in_delta(0.5, Levenshtein.normalized_distance("foo", "foobar", 0.66), 0.01)
|
36
71
|
assert_equal(nil, Levenshtein.normalized_distance("foo", "foobar", 0.30))
|
37
72
|
end
|
38
73
|
|
@@ -40,51 +75,80 @@ class TestLevenshtein < Test::Unit::TestCase
|
|
40
75
|
assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd"))
|
41
76
|
assert_equal(3, Levenshtein.distance("ab123", "abxyz"))
|
42
77
|
assert_equal(3, Levenshtein.distance("123cd", "xyzcd"))
|
78
|
+
assert_equal(5, Levenshtein.distance("123cd123", "123"))
|
43
79
|
|
44
80
|
assert_in_delta(0.42, Levenshtein.normalized_distance("ab123cd", "abxyzcd"), 0.01)
|
45
81
|
assert_in_delta(0.6, Levenshtein.normalized_distance("ab123", "abxyz"), 0.01)
|
46
82
|
assert_in_delta(0.6, Levenshtein.normalized_distance("123cd", "xyzcd"), 0.01)
|
83
|
+
assert_in_delta(0.625, Levenshtein.normalized_distance("123cd123", "123"), 0.01)
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_interface
|
87
|
+
seq1 = Levenshtein::TestSequence.new("erik".scan(/./).collect{|e| Levenshtein::TestElement.new(e)})
|
88
|
+
seq2 = Levenshtein::TestSequence.new("veenstra".scan(/./).collect{|e| Levenshtein::TestElement.new(e)})
|
89
|
+
|
90
|
+
assert_equal(7, Levenshtein.distance(seq1, seq2))
|
47
91
|
end
|
48
92
|
end
|
49
93
|
|
50
|
-
class
|
94
|
+
class TestLevenshteinFast < Test::Unit::TestCase
|
51
95
|
def test_erik_veenstra
|
52
|
-
assert_equal(7, Levenshtein.
|
96
|
+
assert_equal(7, Levenshtein.distance("erik", "veenstra", nil, :force_slow=>false))
|
97
|
+
assert_equal(7, Levenshtein.distance("veenstra", "erik", nil, :force_slow=>false))
|
53
98
|
end
|
54
99
|
|
55
100
|
def test_empty_string
|
56
|
-
assert_equal(0, Levenshtein.
|
57
|
-
assert_equal(3, Levenshtein.
|
101
|
+
assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>false))
|
102
|
+
assert_equal(3, Levenshtein.distance("", "foo", nil, :force_slow=>false))
|
103
|
+
assert_equal(3, Levenshtein.distance("foo", "", nil, :force_slow=>false))
|
58
104
|
end
|
59
105
|
|
60
106
|
def test_same_string
|
61
|
-
assert_equal(0, Levenshtein.
|
62
|
-
assert_equal(0, Levenshtein.
|
107
|
+
assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>false))
|
108
|
+
assert_equal(0, Levenshtein.distance("foo", "foo", nil, :force_slow=>false))
|
63
109
|
end
|
64
110
|
|
65
111
|
def test_threshold
|
66
|
-
assert_equal(3, Levenshtein.
|
67
|
-
assert_equal(
|
112
|
+
assert_equal(3, Levenshtein.distance("foo", "foobar", nil, :force_slow=>false))
|
113
|
+
assert_equal(3, Levenshtein.distance("foo", "foobar", 4, :force_slow=>false))
|
114
|
+
assert_equal(nil, Levenshtein.distance("foo", "foobar", 2, :force_slow=>false))
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_same_head_and_or_tail
|
118
|
+
assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd", nil, :force_slow=>false))
|
119
|
+
assert_equal(3, Levenshtein.distance("ab123", "abxyz", nil, :force_slow=>false))
|
120
|
+
assert_equal(3, Levenshtein.distance("123cd", "xyzcd", nil, :force_slow=>false))
|
121
|
+
assert_equal(5, Levenshtein.distance("123cd123", "123", nil, :force_slow=>false))
|
68
122
|
end
|
69
123
|
end
|
70
124
|
|
71
|
-
class
|
125
|
+
class TestLevenshteinSlow < Test::Unit::TestCase
|
72
126
|
def test_erik_veenstra
|
73
|
-
assert_equal(7, Levenshtein.
|
127
|
+
assert_equal(7, Levenshtein.distance("erik", "veenstra", nil, :force_slow=>true))
|
128
|
+
assert_equal(7, Levenshtein.distance("veenstra", "erik", nil, :force_slow=>true))
|
74
129
|
end
|
75
130
|
|
76
131
|
def test_empty_string
|
77
|
-
assert_equal(0, Levenshtein.
|
78
|
-
assert_equal(3, Levenshtein.
|
132
|
+
assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>true))
|
133
|
+
assert_equal(3, Levenshtein.distance("", "foo", nil, :force_slow=>true))
|
134
|
+
assert_equal(3, Levenshtein.distance("foo", "", nil, :force_slow=>true))
|
79
135
|
end
|
80
136
|
|
81
137
|
def test_same_string
|
82
|
-
assert_equal(0, Levenshtein.
|
83
|
-
assert_equal(0, Levenshtein.
|
138
|
+
assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>true))
|
139
|
+
assert_equal(0, Levenshtein.distance("foo", "foo", nil, :force_slow=>true))
|
84
140
|
end
|
85
141
|
|
86
142
|
def test_threshold
|
87
|
-
assert_equal(3, Levenshtein.
|
88
|
-
assert_equal(
|
143
|
+
assert_equal(3, Levenshtein.distance("foo", "foobar", nil, :force_slow=>true))
|
144
|
+
assert_equal(3, Levenshtein.distance("foo", "foobar", 4, :force_slow=>true))
|
145
|
+
assert_equal(nil, Levenshtein.distance("foo", "foobar", 2, :force_slow=>true))
|
146
|
+
end
|
147
|
+
|
148
|
+
def test_same_head_and_or_tail
|
149
|
+
assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd", nil, :force_slow=>true))
|
150
|
+
assert_equal(3, Levenshtein.distance("ab123", "abxyz", nil, :force_slow=>true))
|
151
|
+
assert_equal(3, Levenshtein.distance("123cd", "xyzcd", nil, :force_slow=>true))
|
152
|
+
assert_equal(5, Levenshtein.distance("123cd123", "123", nil, :force_slow=>true))
|
89
153
|
end
|
90
154
|
end
|
metadata
CHANGED
@@ -1,65 +1,64 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: levenshtein
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.2
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
+
authors:
|
7
8
|
- Erik Veenstra
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
|
12
|
-
date: 2008-05-24 00:00:00 +02:00
|
13
|
-
default_executable:
|
12
|
+
date: 2012-03-16 00:00:00.000000000 Z
|
14
13
|
dependencies: []
|
15
|
-
|
16
14
|
description: Calculates the Levenshtein distance between two byte strings.
|
17
15
|
email: levenshtein@erikveen.dds.nl
|
18
16
|
executables: []
|
19
|
-
|
20
|
-
extensions:
|
17
|
+
extensions:
|
21
18
|
- ext/levenshtein/extconf.rb
|
22
19
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
|
20
|
+
files:
|
21
|
+
- lib/levenshtein/version.rb
|
25
22
|
- lib/levenshtein.rb
|
26
|
-
- ext/levenshtein
|
23
|
+
- ext/levenshtein/levenshtein.h
|
24
|
+
- ext/levenshtein/levenshtein_fast.c
|
27
25
|
- ext/levenshtein/extconf.rb
|
28
|
-
- ext/levenshtein/levenshtein_c.c
|
29
26
|
- README
|
30
27
|
- LICENSE
|
31
28
|
- VERSION
|
32
|
-
|
29
|
+
- CHANGELOG
|
30
|
+
- test/test.rb
|
33
31
|
homepage: http://www.erikveen.dds.nl/levenshtein/index.html
|
32
|
+
licenses: []
|
34
33
|
post_install_message:
|
35
|
-
rdoc_options:
|
34
|
+
rdoc_options:
|
36
35
|
- README
|
37
36
|
- LICENSE
|
38
37
|
- VERSION
|
38
|
+
- CHANGELOG
|
39
39
|
- --title
|
40
|
-
- levenshtein (0.
|
40
|
+
- levenshtein (0.2.2)
|
41
41
|
- --main
|
42
42
|
- README
|
43
|
-
require_paths:
|
43
|
+
require_paths:
|
44
44
|
- lib
|
45
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
57
|
requirements: []
|
58
|
-
|
59
58
|
rubyforge_project: levenshtein
|
60
|
-
rubygems_version: 1.
|
59
|
+
rubygems_version: 1.8.18
|
61
60
|
signing_key:
|
62
|
-
specification_version:
|
61
|
+
specification_version: 3
|
63
62
|
summary: Calculates the Levenshtein distance between two byte strings.
|
64
|
-
test_files:
|
63
|
+
test_files:
|
65
64
|
- test/test.rb
|