aspell_edit_dist 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ .idea/*
2
+ Makefile
3
+ *.o
4
+ *.so
5
+ mkmf.log
data/README ADDED
@@ -0,0 +1 @@
1
+ This is a very simple gem, which purpose is to expose limit_edit_distance from aspell.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ begin
2
+ require 'jeweler'
3
+ Jeweler::Tasks.new do |gemspec|
4
+ gemspec.name = "aspell_edit_dist"
5
+ gemspec.summary = "Gem that exposes limit_edit_distance function from Aspell."
6
+ gemspec.email = "adam@pohorecki.pl"
7
+ gemspec.homepage = "http://github.com/psyho/aspell_edit_dist"
8
+ gemspec.authors = ["Adam Pohorecki"]
9
+ end
10
+ rescue LoadError
11
+ puts "Jeweler not available. Install it with: gem install jeweler"
12
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,127 @@
1
+ #include "aspell_edit_dist.h"
2
+ #include "weights.hpp"
3
+ #include "leditdist.hpp"
4
+
5
+ // Forward declarations
6
+ void Init_edit_distance_weights();
7
+ void Init_limit_edit_distance();
8
+
9
+ extern "C" void Init_aspell_edit_dist() {
10
+ Init_edit_distance_weights();
11
+ Init_limit_edit_distance();
12
+ }
13
+
14
+ static aspeller::EditDistanceWeights* get_weights(VALUE weights) {
15
+ aspeller::EditDistanceWeights* result;
16
+ Data_Get_Struct(weights, aspeller::EditDistanceWeights, result);
17
+ return result;
18
+ }
19
+
20
+ static void weights_free(aspeller::EditDistanceWeights* obj) {
21
+ if (obj) {
22
+ delete obj;
23
+ }
24
+ }
25
+
26
+ static VALUE weights_init(VALUE self) {
27
+ aspeller::EditDistanceWeights * new_obj = new aspeller::EditDistanceWeights();
28
+ return Data_Wrap_Struct(cEditDistanceWeights, 0, weights_free, new_obj);
29
+ }
30
+
31
+ static VALUE weights_del1(VALUE self) {
32
+ return INT2FIX(get_weights(self)->del1);
33
+ }
34
+
35
+ static VALUE weights_set_del1(VALUE self, VALUE val) {
36
+ get_weights(self)->del1 = NUM2INT(val);
37
+ return val;
38
+ }
39
+
40
+ static VALUE weights_del2(VALUE self) {
41
+ return INT2FIX(get_weights(self)->del2);
42
+ }
43
+
44
+ static VALUE weights_set_del2(VALUE self, VALUE val) {
45
+ get_weights(self)->del2 = NUM2INT(val);
46
+ return val;
47
+ }
48
+
49
+ static VALUE weights_swap(VALUE self) {
50
+ return INT2FIX(get_weights(self)->swap);
51
+ }
52
+
53
+ static VALUE weights_set_swap(VALUE self, VALUE val) {
54
+ get_weights(self)->swap = NUM2INT(val);
55
+ return val;
56
+ }
57
+
58
+ static VALUE weights_sub(VALUE self) {
59
+ return INT2FIX(get_weights(self)->sub);
60
+ }
61
+
62
+ static VALUE weights_set_sub(VALUE self, VALUE val) {
63
+ get_weights(self)->sub = NUM2INT(val);
64
+ return val;
65
+ }
66
+
67
+ static VALUE weights_similar(VALUE self) {
68
+ return INT2FIX(get_weights(self)->similar);
69
+ }
70
+
71
+ static VALUE weights_set_similar(VALUE self, VALUE val) {
72
+ get_weights(self)->similar = NUM2INT(val);
73
+ return val;
74
+ }
75
+
76
+ static VALUE weights_min(VALUE self) {
77
+ return INT2FIX(get_weights(self)->min);
78
+ }
79
+
80
+ static VALUE weights_set_min(VALUE self, VALUE val) {
81
+ get_weights(self)->min = NUM2INT(val);
82
+ return val;
83
+ }
84
+
85
+ static VALUE weights_max(VALUE self) {
86
+ return INT2FIX(get_weights(self)->max);
87
+ }
88
+
89
+ static VALUE weights_set_max(VALUE self, VALUE val) {
90
+ get_weights(self)->max = NUM2INT(val);
91
+ return val;
92
+ }
93
+
94
+ typedef VALUE (*rb_method)(...);
95
+
96
+ void Init_edit_distance_weights() {
97
+ mAspell = rb_define_module("Aspeller");
98
+
99
+ cEditDistanceWeights = rb_define_class_under(mAspell, "EditDistanceWeights", rb_cObject);
100
+
101
+ rb_define_method(cEditDistanceWeights, "initialize", (rb_method)weights_init, 0);
102
+ rb_define_singleton_method(cEditDistanceWeights, "new", (rb_method)weights_init, 0);
103
+
104
+ rb_define_method(cEditDistanceWeights, "del1", (rb_method)weights_del1, 0);
105
+ rb_define_method(cEditDistanceWeights, "del1=", (rb_method)weights_set_del1, 1);
106
+ rb_define_method(cEditDistanceWeights, "del2", (rb_method)weights_del2, 0);
107
+ rb_define_method(cEditDistanceWeights, "del2=", (rb_method)weights_set_del2, 1);
108
+ rb_define_method(cEditDistanceWeights, "swap", (rb_method)weights_swap, 0);
109
+ rb_define_method(cEditDistanceWeights, "swap=", (rb_method)weights_set_swap, 1);
110
+ rb_define_method(cEditDistanceWeights, "sub", (rb_method)weights_sub, 0);
111
+ rb_define_method(cEditDistanceWeights, "sub=", (rb_method)weights_set_sub, 1);
112
+ rb_define_method(cEditDistanceWeights, "similar", (rb_method)weights_similar, 0);
113
+ rb_define_method(cEditDistanceWeights, "similar=",(rb_method)weights_set_similar, 1);
114
+ rb_define_method(cEditDistanceWeights, "min", (rb_method)weights_min, 0);
115
+ rb_define_method(cEditDistanceWeights, "min=", (rb_method)weights_set_min, 1);
116
+ rb_define_method(cEditDistanceWeights, "max", (rb_method)weights_max, 0);
117
+ rb_define_method(cEditDistanceWeights, "max=", (rb_method)weights_set_max, 1);
118
+ }
119
+
120
+ static VALUE aspell_limit_edit_distance(VALUE self, VALUE strA, VALUE strB, VALUE limit, VALUE weights) {
121
+ int result = aspeller::limit_edit_distance(STR2CSTR(strA), STR2CSTR(strB), NUM2INT(limit), *get_weights(weights));
122
+ return INT2FIX(result);
123
+ }
124
+
125
+ void Init_limit_edit_distance() {
126
+ rb_define_singleton_method(mAspell, "limit_edit_distance", (rb_method)aspell_limit_edit_distance, 4);
127
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef _ASPELL_EDIT_DIST_H
2
+ #define _ASPELL_EDIT_DIST_H
3
+
4
+ #include <ruby.h>
5
+
6
+ VALUE mAspell;
7
+ VALUE cEditDistanceWeights;
8
+
9
+ #endif
data/ext/extconf.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "mkmf"
2
+
3
+ have_header("ruby.h")
4
+ have_library("stdc++")
5
+ create_makefile("aspell_edit_dist")
data/ext/leditdist.cpp ADDED
@@ -0,0 +1,308 @@
1
+
2
+ #include "leditdist.hpp"
3
+
4
+ // The basic algorithm is as follows:
5
+ //
6
+ // Let A[n] represent the nth character of string n
7
+ // A[n..] represent the substring of A starting at n
8
+ // if n > length of A then it is considered an empty string
9
+ //
10
+ // edit_distance(A,B,limit) = ed(A,B,0)
11
+ // where ed(A,B,d) = d if A & B is empty.
12
+ // = infinity if d > limit
13
+ // = ed(A[2..],B[2..], d) if A[1] == B[1]
14
+ // = min ( ed(A[2..],B[2..], d+1),
15
+ // ed(A, B[2..], d+1),
16
+ // ed(A[2..],B, d+1) ) otherwise
17
+ //
18
+ // However, the code below:
19
+ // 1) Also allows for swaps
20
+ // 2) Allow weights to be attached to each edit
21
+ // 3) Is not recursive, it uses a loop when it is tail recursion
22
+ // and a small stack otherwise. The stack will NEVER be larger
23
+ // then 2 * limit.
24
+ // 4) Is extremely optimized
25
+
26
+
27
+ #define check_rest(a,b,s) \
28
+ a0 = a; b0 = b; \
29
+ while (*a0 == *b0) { \
30
+ if (*a0 == '\0') { \
31
+ if (s < min) min = s; \
32
+ break; \
33
+ } \
34
+ ++a0; ++b0; \
35
+ }
36
+
37
+ namespace aspeller {
38
+
39
+ int limit_edit_distance(const char * a, const char * b,
40
+ int limit, const EditDistanceWeights & w)
41
+ {
42
+ limit = limit*w.max;
43
+ static const int size = 10;
44
+ struct Edit {
45
+ const char * a;
46
+ const char * b;
47
+ int score;
48
+ };
49
+ Edit begin[size];
50
+ Edit * i = begin;
51
+ const char * a0;
52
+ const char * b0;
53
+ int score = 0;
54
+ int min = LARGE_NUM;
55
+
56
+ while (true) {
57
+
58
+ while (*a == *b) {
59
+ if (*a == '\0') {
60
+ if (score < min) min = score;
61
+ goto FINISH;
62
+ }
63
+ ++a; ++b;
64
+ }
65
+
66
+ if (*a == '\0') {
67
+
68
+ do {
69
+ score += w.del2;
70
+ if (score >= min) goto FINISH;
71
+ ++b;
72
+ } while (*b != '\0');
73
+ min = score;
74
+
75
+ } else if (*b == '\0') {
76
+
77
+ do {
78
+ score += w.del1;
79
+ if (score >= min) goto FINISH;
80
+ ++a;
81
+ } while (*a != '\0');
82
+ min = score;
83
+
84
+ } else {
85
+
86
+ if (score + w.max <= limit) {
87
+ if (limit*w.min <= w.max*(w.min+score)) {
88
+ // if floor(score/max)=limit/max-1 then this edit is only good
89
+ // if it makes the rest of the string match. So check if
90
+ // the rest of the string matches to avoid the overhead of
91
+ // pushing it on then off the stack
92
+
93
+ // delete a character from a
94
+ check_rest(a+1,b,score + w.del1);
95
+
96
+ // delete a character from b
97
+ check_rest(a,b+1,score + w.del2);
98
+
99
+ if (*a == *(b+1) && *b == *(a+1)) {
100
+
101
+ // swap two characters
102
+ check_rest(a+2,b+2, score + w.swap);
103
+
104
+ } else {
105
+
106
+ // substitute one character for another which is the same
107
+ // thing as deleting a character from both a & b
108
+ check_rest(a+1,b+1, score + w.sub);
109
+
110
+ }
111
+
112
+ } else {
113
+
114
+ // delete a character from a
115
+ i->a = a + 1;
116
+ i->b = b;
117
+ i->score = score + w.del1;
118
+ ++i;
119
+
120
+ // delete a character from b
121
+ i->a = a;
122
+ i->b = b + 1;
123
+ i->score = score + w.del2;
124
+ ++i;
125
+
126
+ // If two characters can be swapped and make a match
127
+ // then the substitution is pointless.
128
+ // Also, there is no need to push this on the stack as
129
+ // it is going to be imminently removed.
130
+ if (*a == *(b+1) && *b == *(a+1)) {
131
+
132
+ // swap two characters
133
+ a = a + 2;
134
+ b = b + 2;
135
+ score += w.swap;
136
+ continue;
137
+
138
+ } else {
139
+
140
+ // substitute one character for another which is the same
141
+ // thing as deleting a character from both a & b
142
+ a = a + 1;
143
+ b = b + 1;
144
+ score += w.sub;
145
+ continue;
146
+
147
+ }
148
+ }
149
+ }
150
+ }
151
+ FINISH:
152
+ if (i == begin) return min;
153
+ --i;
154
+ a = i->a;
155
+ b = i->b;
156
+ score = i->score;
157
+ }
158
+ }
159
+
160
+ #undef check_rest
161
+ #define check_rest(a,b,w) \
162
+ a0 = a; b0 = b; \
163
+ while(*a0 == *b0) { \
164
+ if (*a0 == '\0') { \
165
+ if (w < min) min = w; \
166
+ break; \
167
+ } \
168
+ ++a0; \
169
+ ++b0; \
170
+ } \
171
+ if (amax < a0) amax = a0;
172
+
173
+ #define check2(a,b,w) \
174
+ aa = a; bb = b; \
175
+ while(*aa == *bb) { \
176
+ if (*aa == '\0') { \
177
+ if (amax < aa) amax = aa; \
178
+ if (w < min) min = w; \
179
+ break; \
180
+ } \
181
+ ++aa; ++bb; \
182
+ } \
183
+ if (*aa == '\0') { \
184
+ if (amax < aa) amax = aa; \
185
+ if (*bb == '\0') {} \
186
+ else if (*(bb+1) == '\0' && w+ws.del2 < min) min = w+ws.del2; \
187
+ } else if (*bb == '\0') { \
188
+ ++aa; \
189
+ if (amax < aa) amax = aa; \
190
+ if (*aa == '\0' && w+ws.del1 < min) min = w+ws.del1; \
191
+ } else { \
192
+ check_rest(aa+1,bb,w+ws.del1); \
193
+ check_rest(aa,bb+1,w+ws.del2); \
194
+ if (*aa == *(bb+1) && *bb == *(aa+1)) { \
195
+ check_rest(aa+2,bb+2,w+ws.swap); \
196
+ } else { \
197
+ check_rest(aa+1,bb+1,w+ws.sub); \
198
+ } \
199
+ }
200
+
201
+ EditDist limit1_edit_distance(const char * a, const char * b,
202
+ const EditDistanceWeights & ws)
203
+ {
204
+ int min = LARGE_NUM;
205
+ const char * a0;
206
+ const char * b0;
207
+ const char * amax = a;
208
+
209
+ while(*a == *b) {
210
+ if (*a == '\0')
211
+ return EditDist(0, a);
212
+ ++a; ++b;
213
+ }
214
+
215
+ if (*a == '\0') {
216
+
217
+ ++b;
218
+ if (*b == '\0') return EditDist(ws.del2, a);
219
+ return EditDist(LARGE_NUM, a);
220
+
221
+ } else if (*b == '\0') {
222
+
223
+ ++a;
224
+ if (*a == '\0') return EditDist(ws.del1, a);
225
+ return EditDist(LARGE_NUM, a);
226
+
227
+ } else {
228
+
229
+ // delete a character from a
230
+ check_rest(a+1,b,ws.del1);
231
+
232
+ // delete a character from b
233
+ check_rest(a,b+1,ws.del2);
234
+
235
+ if (*a == *(b+1) && *b == *(a+1)) {
236
+
237
+ // swap two characters
238
+ check_rest(a+2,b+2,ws.swap);
239
+
240
+ } else {
241
+
242
+ // substitute one character for another which is the same
243
+ // thing as deleting a character from both a & b
244
+ check_rest(a+1,b+1,ws.sub);
245
+
246
+ }
247
+ }
248
+ return EditDist(min, amax);
249
+ }
250
+
251
+ EditDist limit2_edit_distance(const char * a, const char * b,
252
+ const EditDistanceWeights & ws)
253
+ {
254
+ int min = LARGE_NUM;
255
+ const char * a0;
256
+ const char * b0;
257
+ const char * aa;
258
+ const char * bb;
259
+ const char * amax = a;
260
+
261
+ while(*a == *b) {
262
+ if (*a == '\0')
263
+ return EditDist(0, a);
264
+ ++a; ++b;
265
+ }
266
+
267
+ if (*a == '\0') {
268
+
269
+ ++b;
270
+ if (*b == '\0') return EditDist(ws.del2, a);
271
+ ++b;
272
+ if (*b == '\0') return EditDist(2*ws.del2, a);
273
+ return EditDist(LARGE_NUM, a);
274
+
275
+ } else if (*b == '\0') {
276
+
277
+ ++a;
278
+ if (*a == '\0') return EditDist(ws.del1, a);
279
+ ++a;
280
+ if (*a == '\0') return EditDist(2*ws.del1, a);
281
+ return EditDist(LARGE_NUM, a);
282
+
283
+ } else {
284
+
285
+ // delete a character from a
286
+ check2(a+1,b,ws.del1);
287
+
288
+ // delete a character from b
289
+ check2(a,b+1,ws.del2);
290
+
291
+ if (*a == *(b+1) && *b == *(a+1)) {
292
+
293
+ // swap two characters
294
+ check2(a+2,b+2,ws.swap);
295
+
296
+ } else {
297
+
298
+ // substitute one character for another which is the same
299
+ // thing as deleting a character from both a & b
300
+ check2(a+1,b+1,ws.sub);
301
+
302
+ }
303
+ }
304
+ return EditDist(min, amax);
305
+ }
306
+ }
307
+
308
+
data/ext/leditdist.hpp ADDED
@@ -0,0 +1,68 @@
1
+
2
+ #ifndef __aspeller_leditdist_hh__
3
+ #define __aspeller_leditdist_hh__
4
+
5
+ #include "weights.hpp"
6
+
7
+ namespace aspeller {
8
+
9
+ // limit_edit_distance finds the shortest edit distance but will
10
+ // stop and return a number at least as large as LARGE_NUM if it has
11
+ // to do more edits than a set limit.
12
+ // Note that this does NOT mean that the score returned is <= limit*w.max
13
+ // as "sub" vs "submarine" will return 6*(cost of insertion) no matter what
14
+ // the limit is.
15
+ // The edit distance is
16
+ // (cost of swap)(# of swaps) + (cost of deletion)(# of deletions)
17
+ // + (cost of insertion)(# of insertions)
18
+ // + (cost of substitutions)(# of substitutions)
19
+
20
+ // Preconditions:
21
+ // max(strlen(a), strlen(b))*max(of the edit weights) <= 2^15
22
+ // if violated than an incorrect result may be returned (which may be negative)
23
+ // due to overflow of a short integer
24
+ // (limit+1)*w.min < limit*w.max
25
+ // limit <= 5 (use edit_distance if limit > 5)
26
+ // where w.min and w.max is the minimum and maximum cost of an edit
27
+ // respectfully.
28
+
29
+ // The running time is asymptotically bounded above by
30
+ // (3^l)*n where l is the limit and n is the maxium of strlen(a),strlen(b)
31
+ // Based on my informal tests, however, the n does not really matter
32
+ // and the running time is more like (3^l).
33
+
34
+ // limit_edit_distance, based on my informal tests, turns out to be
35
+ // faster than edit_dist for l < 5. For l == 5 it is about the
36
+ // smaller for short strings (<= 5) and less than for longer strings
37
+
38
+ // limit2_edit_distance(a,b,w) = limit_edit_distance(a,b,2,w)
39
+ // but is roughly 2/3's faster
40
+
41
+ struct EditDist {
42
+ int score;
43
+ const char * stopped_at;
44
+ EditDist() {}
45
+ EditDist(int s, const char * p)
46
+ : score(s), stopped_at(p) {}
47
+ operator int () const {return score;}
48
+ };
49
+
50
+ static const int LARGE_NUM = 0xFFFFF;
51
+ // this needs to be SMALLER than INT_MAX since it may be incremented
52
+ // a few times
53
+
54
+ int limit_edit_distance(const char * a, const char * b, int limit,
55
+ const EditDistanceWeights & w
56
+ = EditDistanceWeights());
57
+
58
+ EditDist limit1_edit_distance(const char * a, const char * b,
59
+ const EditDistanceWeights & w
60
+ = EditDistanceWeights());
61
+
62
+ EditDist limit2_edit_distance(const char * a, const char * b,
63
+ const EditDistanceWeights & w
64
+ = EditDistanceWeights());
65
+
66
+ }
67
+
68
+ #endif
data/ext/weights.hpp ADDED
@@ -0,0 +1,23 @@
1
+
2
+ #ifndef __aspeller_weights_hh__
3
+ #define __aspeller_weights_hh__
4
+
5
+ namespace aspeller {
6
+
7
+ struct EditDistanceWeights {
8
+ int del1; // the cost of deleting a char in the first string
9
+ int del2; // the cost of inserting a character or deleting a char
10
+ // in the next string
11
+ int swap; // the cost of swapping two adjacent letters
12
+ int sub; // the cost of replacing one letter with another
13
+ int similar; // the cost of a "similar" but not exact match for
14
+ // two characters
15
+ int min; // the min of del1, del2, swap and sub.
16
+ int max; // the max of del1, del2, swap and sub.
17
+ EditDistanceWeights()
18
+ : del1(1), del2(1), swap(1), sub(1), similar(0), min(1), max(1) {}
19
+ };
20
+
21
+ }
22
+
23
+ #endif
@@ -0,0 +1,61 @@
1
+ raise "This file should never be required. It's here only for documentation purposes."
2
+
3
+ # module, through which the functionality of edit distance calculation is possible
4
+ module Aspeller
5
+
6
+ # weights that are used by Aspell to determine edit distance between two strings
7
+ class EditDistanceWeights
8
+
9
+ # the cost of deleting a char in the first string, defaults to 1
10
+ attr_accessor :del1
11
+
12
+ # the cost of inserting a character or deleting a char in the next string, defaults to 1
13
+ attr_accessor :del2
14
+
15
+ # the cost of swapping two adjacent letters, defaults to 1
16
+ attr_accessor :swap
17
+
18
+ # the cost of replacing one letter with another, defaults to 1
19
+ attr_accessor :sub
20
+
21
+ # the cost of a "similar" but not exact match for two characters, defaults to 0
22
+ attr_accessor :similar
23
+
24
+ # the min of del1, del2, swap and sub, defaults to 1
25
+ attr_accessor :min
26
+
27
+ # the max of del1, del2, swap and sub, defaults to 1
28
+ attr_accessor :max
29
+
30
+ end
31
+
32
+ # limit_edit_distance finds the shortest edit distance but will
33
+ # stop and return a number at least as large as LARGE_NUM if it has
34
+ # to do more edits than a set limit.
35
+ # Note that this does NOT mean that the score returned is <= limit*w.max
36
+ # as "sub" vs "submarine" will return 6*(cost of insertion) no matter what
37
+ # the limit is.
38
+ # The edit distance is
39
+ # (cost of swap)(# of swaps) + (cost of deletion)(# of deletions)
40
+ # + (cost of insertion)(# of insertions)
41
+ # + (cost of substitutions)(# of substitutions)
42
+ #
43
+ # Preconditions:
44
+ # max(strlen(a), strlen(b))*max(of the edit weights) <= 2^15
45
+ # if violated than an incorrect result may be returned (which may be negative)
46
+ # due to overflow of a short integer
47
+ # (limit+1)*w.min < limit*w.max
48
+ # limit <= 5 (use edit_distance if limit > 5)
49
+ # where w.min and w.max is the minimum and maximum cost of an edit
50
+ # respectfully.
51
+ #
52
+ # The running time is asymptotically bounded above by
53
+ # (3^l)*n where l is the limit and n is the maxium of strlen(a),strlen(b)
54
+ # Based on my informal tests, however, the n does not really matter
55
+ # and the running time is more like (3^l).
56
+ #
57
+ # limit_edit_distance, based on my informal tests, turns out to be
58
+ # faster than edit_dist for l < 5. For l == 5 it is about the
59
+ # smaller for short strings (<= 5) and less than for longer strings
60
+ def self.limit_edit_distance(strA, strB, limit, weights); end
61
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/test_helper.rb' )
2
+
3
+ class EditDistanceTest < Test::Unit::TestCase
4
+
5
+ def test_limit_distance_defined
6
+ assert defined?(Aspeller), "module Aspell must be defined"
7
+ assert defined?(Aspeller.limit_edit_distance), "method Aspell.limit_edit_distance must be defined"
8
+ end
9
+
10
+ def test_limit_distance_returns_fixnum
11
+ result = Aspeller.limit_edit_distance("a", "b", 1, Aspeller::EditDistanceWeights.new)
12
+ assert result.is_a?(Fixnum), "limit_edit_distance is supposed to return a fixnum"
13
+ end
14
+
15
+ def test_limit_distance_returns_right_value
16
+ result = Aspeller.limit_edit_distance("test", "tast", 1, Aspeller::EditDistanceWeights.new)
17
+ assert_equal 1, result
18
+ end
19
+
20
+ def test_limit_distance_uses_passed_weights
21
+ weights = Aspeller::EditDistanceWeights.new
22
+ weights.sub = 2
23
+ result = Aspeller.limit_edit_distance("test", "tast", 1, weights)
24
+ assert_equal 2, result
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH << File.expand_path( File.dirname(__FILE__) + '/../ext' )
2
+ $LOAD_PATH << File.expand_path( File.dirname(__FILE__) + '/..' )
3
+
4
+ require 'test/unit'
5
+
6
+ system("cd #{File.expand_path(File.join(File.dirname(__FILE__), '..'))}; make distclean")
7
+
8
+ system("cd #{File.expand_path(File.join(File.dirname(__FILE__), '..'))}; ruby ext/extconf.rb")
9
+ system("cd #{File.expand_path(File.join(File.dirname(__FILE__), '..'))}; make")
10
+
11
+ require "aspell_edit_dist"
@@ -0,0 +1,51 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/test_helper.rb' )
2
+
3
+ class WeightsTest < Test::Unit::TestCase
4
+
5
+ def test_weights_defined
6
+ assert defined?(Aspeller), "Aspeller module should be defined"
7
+ assert defined?(Aspeller::EditDistanceWeights), "class Aspeller::EditDistanceWeights is supposed to be defined"
8
+ end
9
+
10
+ def test_weights_has_constructor
11
+ assert_respond_to Aspeller::EditDistanceWeights, :new
12
+ end
13
+
14
+ WEIGHT_METHODS = [:del1, :del2, :swap, :sub, :similar, :min, :max]
15
+
16
+ def test_weights_getters
17
+ weights = Aspeller::EditDistanceWeights.new
18
+ WEIGHT_METHODS.each do |method_name|
19
+ assert_respond_to weights, method_name
20
+ assert weights.send(method_name), "weights.#{method_name} should return something"
21
+ end
22
+ end
23
+
24
+ def test_weights_default_values
25
+ weights = Aspeller::EditDistanceWeights.new
26
+ assert_equal 1, weights.del1
27
+ assert_equal 1, weights.del2
28
+ assert_equal 1, weights.swap
29
+ assert_equal 1, weights.sub
30
+ assert_equal 0, weights.similar
31
+ assert_equal 1, weights.min
32
+ assert_equal 1, weights.max
33
+ end
34
+
35
+ def test_weights_setters
36
+ weights = Aspeller::EditDistanceWeights.new
37
+
38
+ n = 123
39
+ WEIGHT_METHODS.each do |method_name|
40
+ setter_name = :"#{method_name}="
41
+
42
+ assert_respond_to weights, setter_name
43
+
44
+ weights.send(setter_name, n)
45
+ assert_equal n, weights.send(method_name)
46
+
47
+ n += 1
48
+ end
49
+ end
50
+
51
+ end
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: aspell_edit_dist
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Adam Pohorecki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-29 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: adam@pohorecki.pl
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - .gitignore
26
+ - README
27
+ - Rakefile
28
+ - VERSION
29
+ - ext/aspell_edit_dist.cpp
30
+ - ext/aspell_edit_dist.h
31
+ - ext/extconf.rb
32
+ - ext/leditdist.cpp
33
+ - ext/leditdist.hpp
34
+ - ext/weights.hpp
35
+ - lib/aspell_edit_dist_stub.rb
36
+ - test/edit_distance_test.rb
37
+ - test/test_helper.rb
38
+ - test/weights_test.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/psyho/aspell_edit_dist
41
+ licenses: []
42
+
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --charset=UTF-8
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ version:
60
+ requirements: []
61
+
62
+ rubyforge_project:
63
+ rubygems_version: 1.3.5
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Gem that exposes limit_edit_distance function from Aspell.
67
+ test_files:
68
+ - test/edit_distance_test.rb
69
+ - test/weights_test.rb
70
+ - test/test_helper.rb