aspell_edit_dist 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ .idea/*
2
+ Makefile
3
+ *.o
4
+ *.so
5
+ mkmf.log
data/README ADDED
@@ -0,0 +1 @@
1
+ This is a very simple gem, which purpose is to expose limit_edit_distance from aspell.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ begin
2
+ require 'jeweler'
3
+ Jeweler::Tasks.new do |gemspec|
4
+ gemspec.name = "aspell_edit_dist"
5
+ gemspec.summary = "Gem that exposes limit_edit_distance function from Aspell."
6
+ gemspec.email = "adam@pohorecki.pl"
7
+ gemspec.homepage = "http://github.com/psyho/aspell_edit_dist"
8
+ gemspec.authors = ["Adam Pohorecki"]
9
+ end
10
+ rescue LoadError
11
+ puts "Jeweler not available. Install it with: gem install jeweler"
12
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,127 @@
1
+ #include "aspell_edit_dist.h"
2
+ #include "weights.hpp"
3
+ #include "leditdist.hpp"
4
+
5
+ // Forward declarations
6
+ void Init_edit_distance_weights();
7
+ void Init_limit_edit_distance();
8
+
9
+ extern "C" void Init_aspell_edit_dist() {
10
+ Init_edit_distance_weights();
11
+ Init_limit_edit_distance();
12
+ }
13
+
14
+ static aspeller::EditDistanceWeights* get_weights(VALUE weights) {
15
+ aspeller::EditDistanceWeights* result;
16
+ Data_Get_Struct(weights, aspeller::EditDistanceWeights, result);
17
+ return result;
18
+ }
19
+
20
+ static void weights_free(aspeller::EditDistanceWeights* obj) {
21
+ if (obj) {
22
+ delete obj;
23
+ }
24
+ }
25
+
26
+ static VALUE weights_init(VALUE self) {
27
+ aspeller::EditDistanceWeights * new_obj = new aspeller::EditDistanceWeights();
28
+ return Data_Wrap_Struct(cEditDistanceWeights, 0, weights_free, new_obj);
29
+ }
30
+
31
+ static VALUE weights_del1(VALUE self) {
32
+ return INT2FIX(get_weights(self)->del1);
33
+ }
34
+
35
+ static VALUE weights_set_del1(VALUE self, VALUE val) {
36
+ get_weights(self)->del1 = NUM2INT(val);
37
+ return val;
38
+ }
39
+
40
+ static VALUE weights_del2(VALUE self) {
41
+ return INT2FIX(get_weights(self)->del2);
42
+ }
43
+
44
+ static VALUE weights_set_del2(VALUE self, VALUE val) {
45
+ get_weights(self)->del2 = NUM2INT(val);
46
+ return val;
47
+ }
48
+
49
+ static VALUE weights_swap(VALUE self) {
50
+ return INT2FIX(get_weights(self)->swap);
51
+ }
52
+
53
+ static VALUE weights_set_swap(VALUE self, VALUE val) {
54
+ get_weights(self)->swap = NUM2INT(val);
55
+ return val;
56
+ }
57
+
58
+ static VALUE weights_sub(VALUE self) {
59
+ return INT2FIX(get_weights(self)->sub);
60
+ }
61
+
62
+ static VALUE weights_set_sub(VALUE self, VALUE val) {
63
+ get_weights(self)->sub = NUM2INT(val);
64
+ return val;
65
+ }
66
+
67
+ static VALUE weights_similar(VALUE self) {
68
+ return INT2FIX(get_weights(self)->similar);
69
+ }
70
+
71
+ static VALUE weights_set_similar(VALUE self, VALUE val) {
72
+ get_weights(self)->similar = NUM2INT(val);
73
+ return val;
74
+ }
75
+
76
+ static VALUE weights_min(VALUE self) {
77
+ return INT2FIX(get_weights(self)->min);
78
+ }
79
+
80
+ static VALUE weights_set_min(VALUE self, VALUE val) {
81
+ get_weights(self)->min = NUM2INT(val);
82
+ return val;
83
+ }
84
+
85
+ static VALUE weights_max(VALUE self) {
86
+ return INT2FIX(get_weights(self)->max);
87
+ }
88
+
89
+ static VALUE weights_set_max(VALUE self, VALUE val) {
90
+ get_weights(self)->max = NUM2INT(val);
91
+ return val;
92
+ }
93
+
94
+ typedef VALUE (*rb_method)(...);
95
+
96
+ void Init_edit_distance_weights() {
97
+ mAspell = rb_define_module("Aspeller");
98
+
99
+ cEditDistanceWeights = rb_define_class_under(mAspell, "EditDistanceWeights", rb_cObject);
100
+
101
+ rb_define_method(cEditDistanceWeights, "initialize", (rb_method)weights_init, 0);
102
+ rb_define_singleton_method(cEditDistanceWeights, "new", (rb_method)weights_init, 0);
103
+
104
+ rb_define_method(cEditDistanceWeights, "del1", (rb_method)weights_del1, 0);
105
+ rb_define_method(cEditDistanceWeights, "del1=", (rb_method)weights_set_del1, 1);
106
+ rb_define_method(cEditDistanceWeights, "del2", (rb_method)weights_del2, 0);
107
+ rb_define_method(cEditDistanceWeights, "del2=", (rb_method)weights_set_del2, 1);
108
+ rb_define_method(cEditDistanceWeights, "swap", (rb_method)weights_swap, 0);
109
+ rb_define_method(cEditDistanceWeights, "swap=", (rb_method)weights_set_swap, 1);
110
+ rb_define_method(cEditDistanceWeights, "sub", (rb_method)weights_sub, 0);
111
+ rb_define_method(cEditDistanceWeights, "sub=", (rb_method)weights_set_sub, 1);
112
+ rb_define_method(cEditDistanceWeights, "similar", (rb_method)weights_similar, 0);
113
+ rb_define_method(cEditDistanceWeights, "similar=",(rb_method)weights_set_similar, 1);
114
+ rb_define_method(cEditDistanceWeights, "min", (rb_method)weights_min, 0);
115
+ rb_define_method(cEditDistanceWeights, "min=", (rb_method)weights_set_min, 1);
116
+ rb_define_method(cEditDistanceWeights, "max", (rb_method)weights_max, 0);
117
+ rb_define_method(cEditDistanceWeights, "max=", (rb_method)weights_set_max, 1);
118
+ }
119
+
120
+ static VALUE aspell_limit_edit_distance(VALUE self, VALUE strA, VALUE strB, VALUE limit, VALUE weights) {
121
+ int result = aspeller::limit_edit_distance(STR2CSTR(strA), STR2CSTR(strB), NUM2INT(limit), *get_weights(weights));
122
+ return INT2FIX(result);
123
+ }
124
+
125
+ void Init_limit_edit_distance() {
126
+ rb_define_singleton_method(mAspell, "limit_edit_distance", (rb_method)aspell_limit_edit_distance, 4);
127
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef _ASPELL_EDIT_DIST_H
2
+ #define _ASPELL_EDIT_DIST_H
3
+
4
+ #include <ruby.h>
5
+
6
+ VALUE mAspell;
7
+ VALUE cEditDistanceWeights;
8
+
9
+ #endif
data/ext/extconf.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "mkmf"
2
+
3
+ have_header("ruby.h")
4
+ have_library("stdc++")
5
+ create_makefile("aspell_edit_dist")
data/ext/leditdist.cpp ADDED
@@ -0,0 +1,308 @@
1
+
2
+ #include "leditdist.hpp"
3
+
4
+ // The basic algorithm is as follows:
5
+ //
6
+ // Let A[n] represent the nth character of string n
7
+ // A[n..] represent the substring of A starting at n
8
+ // if n > length of A then it is considered an empty string
9
+ //
10
+ // edit_distance(A,B,limit) = ed(A,B,0)
11
+ // where ed(A,B,d) = d if A & B is empty.
12
+ // = infinity if d > limit
13
+ // = ed(A[2..],B[2..], d) if A[1] == B[1]
14
+ // = min ( ed(A[2..],B[2..], d+1),
15
+ // ed(A, B[2..], d+1),
16
+ // ed(A[2..],B, d+1) ) otherwise
17
+ //
18
+ // However, the code below:
19
+ // 1) Also allows for swaps
20
+ // 2) Allow weights to be attached to each edit
21
+ // 3) Is not recursive, it uses a loop when it is tail recursion
22
+ // and a small stack otherwise. The stack will NEVER be larger
23
+ // then 2 * limit.
24
+ // 4) Is extremely optimized
25
+
26
+
27
+ #define check_rest(a,b,s) \
28
+ a0 = a; b0 = b; \
29
+ while (*a0 == *b0) { \
30
+ if (*a0 == '\0') { \
31
+ if (s < min) min = s; \
32
+ break; \
33
+ } \
34
+ ++a0; ++b0; \
35
+ }
36
+
37
+ namespace aspeller {
38
+
39
+ int limit_edit_distance(const char * a, const char * b,
40
+ int limit, const EditDistanceWeights & w)
41
+ {
42
+ limit = limit*w.max;
43
+ static const int size = 10;
44
+ struct Edit {
45
+ const char * a;
46
+ const char * b;
47
+ int score;
48
+ };
49
+ Edit begin[size];
50
+ Edit * i = begin;
51
+ const char * a0;
52
+ const char * b0;
53
+ int score = 0;
54
+ int min = LARGE_NUM;
55
+
56
+ while (true) {
57
+
58
+ while (*a == *b) {
59
+ if (*a == '\0') {
60
+ if (score < min) min = score;
61
+ goto FINISH;
62
+ }
63
+ ++a; ++b;
64
+ }
65
+
66
+ if (*a == '\0') {
67
+
68
+ do {
69
+ score += w.del2;
70
+ if (score >= min) goto FINISH;
71
+ ++b;
72
+ } while (*b != '\0');
73
+ min = score;
74
+
75
+ } else if (*b == '\0') {
76
+
77
+ do {
78
+ score += w.del1;
79
+ if (score >= min) goto FINISH;
80
+ ++a;
81
+ } while (*a != '\0');
82
+ min = score;
83
+
84
+ } else {
85
+
86
+ if (score + w.max <= limit) {
87
+ if (limit*w.min <= w.max*(w.min+score)) {
88
+ // if floor(score/max)=limit/max-1 then this edit is only good
89
+ // if it makes the rest of the string match. So check if
90
+ // the rest of the string matches to avoid the overhead of
91
+ // pushing it on then off the stack
92
+
93
+ // delete a character from a
94
+ check_rest(a+1,b,score + w.del1);
95
+
96
+ // delete a character from b
97
+ check_rest(a,b+1,score + w.del2);
98
+
99
+ if (*a == *(b+1) && *b == *(a+1)) {
100
+
101
+ // swap two characters
102
+ check_rest(a+2,b+2, score + w.swap);
103
+
104
+ } else {
105
+
106
+ // substitute one character for another which is the same
107
+ // thing as deleting a character from both a & b
108
+ check_rest(a+1,b+1, score + w.sub);
109
+
110
+ }
111
+
112
+ } else {
113
+
114
+ // delete a character from a
115
+ i->a = a + 1;
116
+ i->b = b;
117
+ i->score = score + w.del1;
118
+ ++i;
119
+
120
+ // delete a character from b
121
+ i->a = a;
122
+ i->b = b + 1;
123
+ i->score = score + w.del2;
124
+ ++i;
125
+
126
+ // If two characters can be swapped and make a match
127
+ // then the substitution is pointless.
128
+ // Also, there is no need to push this on the stack as
129
+ // it is going to be imminently removed.
130
+ if (*a == *(b+1) && *b == *(a+1)) {
131
+
132
+ // swap two characters
133
+ a = a + 2;
134
+ b = b + 2;
135
+ score += w.swap;
136
+ continue;
137
+
138
+ } else {
139
+
140
+ // substitute one character for another which is the same
141
+ // thing as deleting a character from both a & b
142
+ a = a + 1;
143
+ b = b + 1;
144
+ score += w.sub;
145
+ continue;
146
+
147
+ }
148
+ }
149
+ }
150
+ }
151
+ FINISH:
152
+ if (i == begin) return min;
153
+ --i;
154
+ a = i->a;
155
+ b = i->b;
156
+ score = i->score;
157
+ }
158
+ }
159
+
160
+ #undef check_rest
161
+ #define check_rest(a,b,w) \
162
+ a0 = a; b0 = b; \
163
+ while(*a0 == *b0) { \
164
+ if (*a0 == '\0') { \
165
+ if (w < min) min = w; \
166
+ break; \
167
+ } \
168
+ ++a0; \
169
+ ++b0; \
170
+ } \
171
+ if (amax < a0) amax = a0;
172
+
173
+ #define check2(a,b,w) \
174
+ aa = a; bb = b; \
175
+ while(*aa == *bb) { \
176
+ if (*aa == '\0') { \
177
+ if (amax < aa) amax = aa; \
178
+ if (w < min) min = w; \
179
+ break; \
180
+ } \
181
+ ++aa; ++bb; \
182
+ } \
183
+ if (*aa == '\0') { \
184
+ if (amax < aa) amax = aa; \
185
+ if (*bb == '\0') {} \
186
+ else if (*(bb+1) == '\0' && w+ws.del2 < min) min = w+ws.del2; \
187
+ } else if (*bb == '\0') { \
188
+ ++aa; \
189
+ if (amax < aa) amax = aa; \
190
+ if (*aa == '\0' && w+ws.del1 < min) min = w+ws.del1; \
191
+ } else { \
192
+ check_rest(aa+1,bb,w+ws.del1); \
193
+ check_rest(aa,bb+1,w+ws.del2); \
194
+ if (*aa == *(bb+1) && *bb == *(aa+1)) { \
195
+ check_rest(aa+2,bb+2,w+ws.swap); \
196
+ } else { \
197
+ check_rest(aa+1,bb+1,w+ws.sub); \
198
+ } \
199
+ }
200
+
201
+ EditDist limit1_edit_distance(const char * a, const char * b,
202
+ const EditDistanceWeights & ws)
203
+ {
204
+ int min = LARGE_NUM;
205
+ const char * a0;
206
+ const char * b0;
207
+ const char * amax = a;
208
+
209
+ while(*a == *b) {
210
+ if (*a == '\0')
211
+ return EditDist(0, a);
212
+ ++a; ++b;
213
+ }
214
+
215
+ if (*a == '\0') {
216
+
217
+ ++b;
218
+ if (*b == '\0') return EditDist(ws.del2, a);
219
+ return EditDist(LARGE_NUM, a);
220
+
221
+ } else if (*b == '\0') {
222
+
223
+ ++a;
224
+ if (*a == '\0') return EditDist(ws.del1, a);
225
+ return EditDist(LARGE_NUM, a);
226
+
227
+ } else {
228
+
229
+ // delete a character from a
230
+ check_rest(a+1,b,ws.del1);
231
+
232
+ // delete a character from b
233
+ check_rest(a,b+1,ws.del2);
234
+
235
+ if (*a == *(b+1) && *b == *(a+1)) {
236
+
237
+ // swap two characters
238
+ check_rest(a+2,b+2,ws.swap);
239
+
240
+ } else {
241
+
242
+ // substitute one character for another which is the same
243
+ // thing as deleting a character from both a & b
244
+ check_rest(a+1,b+1,ws.sub);
245
+
246
+ }
247
+ }
248
+ return EditDist(min, amax);
249
+ }
250
+
251
+ EditDist limit2_edit_distance(const char * a, const char * b,
252
+ const EditDistanceWeights & ws)
253
+ {
254
+ int min = LARGE_NUM;
255
+ const char * a0;
256
+ const char * b0;
257
+ const char * aa;
258
+ const char * bb;
259
+ const char * amax = a;
260
+
261
+ while(*a == *b) {
262
+ if (*a == '\0')
263
+ return EditDist(0, a);
264
+ ++a; ++b;
265
+ }
266
+
267
+ if (*a == '\0') {
268
+
269
+ ++b;
270
+ if (*b == '\0') return EditDist(ws.del2, a);
271
+ ++b;
272
+ if (*b == '\0') return EditDist(2*ws.del2, a);
273
+ return EditDist(LARGE_NUM, a);
274
+
275
+ } else if (*b == '\0') {
276
+
277
+ ++a;
278
+ if (*a == '\0') return EditDist(ws.del1, a);
279
+ ++a;
280
+ if (*a == '\0') return EditDist(2*ws.del1, a);
281
+ return EditDist(LARGE_NUM, a);
282
+
283
+ } else {
284
+
285
+ // delete a character from a
286
+ check2(a+1,b,ws.del1);
287
+
288
+ // delete a character from b
289
+ check2(a,b+1,ws.del2);
290
+
291
+ if (*a == *(b+1) && *b == *(a+1)) {
292
+
293
+ // swap two characters
294
+ check2(a+2,b+2,ws.swap);
295
+
296
+ } else {
297
+
298
+ // substitute one character for another which is the same
299
+ // thing as deleting a character from both a & b
300
+ check2(a+1,b+1,ws.sub);
301
+
302
+ }
303
+ }
304
+ return EditDist(min, amax);
305
+ }
306
+ }
307
+
308
+
data/ext/leditdist.hpp ADDED
@@ -0,0 +1,68 @@
1
+
2
+ #ifndef __aspeller_leditdist_hh__
3
+ #define __aspeller_leditdist_hh__
4
+
5
+ #include "weights.hpp"
6
+
7
+ namespace aspeller {
8
+
9
+ // limit_edit_distance finds the shortest edit distance but will
10
+ // stop and return a number at least as large as LARGE_NUM if it has
11
+ // to do more edits than a set limit.
12
+ // Note that this does NOT mean that the score returned is <= limit*w.max
13
+ // as "sub" vs "submarine" will return 6*(cost of insertion) no matter what
14
+ // the limit is.
15
+ // The edit distance is
16
+ // (cost of swap)(# of swaps) + (cost of deletion)(# of deletions)
17
+ // + (cost of insertion)(# of insertions)
18
+ // + (cost of substitutions)(# of substitutions)
19
+
20
+ // Preconditions:
21
+ // max(strlen(a), strlen(b))*max(of the edit weights) <= 2^15
22
+ // if violated than an incorrect result may be returned (which may be negative)
23
+ // due to overflow of a short integer
24
+ // (limit+1)*w.min < limit*w.max
25
+ // limit <= 5 (use edit_distance if limit > 5)
26
+ // where w.min and w.max is the minimum and maximum cost of an edit
27
+ // respectfully.
28
+
29
+ // The running time is asymptotically bounded above by
30
+ // (3^l)*n where l is the limit and n is the maxium of strlen(a),strlen(b)
31
+ // Based on my informal tests, however, the n does not really matter
32
+ // and the running time is more like (3^l).
33
+
34
+ // limit_edit_distance, based on my informal tests, turns out to be
35
+ // faster than edit_dist for l < 5. For l == 5 it is about the
36
+ // smaller for short strings (<= 5) and less than for longer strings
37
+
38
+ // limit2_edit_distance(a,b,w) = limit_edit_distance(a,b,2,w)
39
+ // but is roughly 2/3's faster
40
+
41
+ struct EditDist {
42
+ int score;
43
+ const char * stopped_at;
44
+ EditDist() {}
45
+ EditDist(int s, const char * p)
46
+ : score(s), stopped_at(p) {}
47
+ operator int () const {return score;}
48
+ };
49
+
50
+ static const int LARGE_NUM = 0xFFFFF;
51
+ // this needs to be SMALLER than INT_MAX since it may be incremented
52
+ // a few times
53
+
54
+ int limit_edit_distance(const char * a, const char * b, int limit,
55
+ const EditDistanceWeights & w
56
+ = EditDistanceWeights());
57
+
58
+ EditDist limit1_edit_distance(const char * a, const char * b,
59
+ const EditDistanceWeights & w
60
+ = EditDistanceWeights());
61
+
62
+ EditDist limit2_edit_distance(const char * a, const char * b,
63
+ const EditDistanceWeights & w
64
+ = EditDistanceWeights());
65
+
66
+ }
67
+
68
+ #endif
data/ext/weights.hpp ADDED
@@ -0,0 +1,23 @@
1
+
2
+ #ifndef __aspeller_weights_hh__
3
+ #define __aspeller_weights_hh__
4
+
5
+ namespace aspeller {
6
+
7
+ struct EditDistanceWeights {
8
+ int del1; // the cost of deleting a char in the first string
9
+ int del2; // the cost of inserting a character or deleting a char
10
+ // in the next string
11
+ int swap; // the cost of swapping two adjacent letters
12
+ int sub; // the cost of replacing one letter with another
13
+ int similar; // the cost of a "similar" but not exact match for
14
+ // two characters
15
+ int min; // the min of del1, del2, swap and sub.
16
+ int max; // the max of del1, del2, swap and sub.
17
+ EditDistanceWeights()
18
+ : del1(1), del2(1), swap(1), sub(1), similar(0), min(1), max(1) {}
19
+ };
20
+
21
+ }
22
+
23
+ #endif
@@ -0,0 +1,61 @@
1
+ raise "This file should never be required. It's here only for documentation purposes."
2
+
3
+ # module, through which the functionality of edit distance calculation is possible
4
+ module Aspeller
5
+
6
+ # weights that are used by Aspell to determine edit distance between two strings
7
+ class EditDistanceWeights
8
+
9
+ # the cost of deleting a char in the first string, defaults to 1
10
+ attr_accessor :del1
11
+
12
+ # the cost of inserting a character or deleting a char in the next string, defaults to 1
13
+ attr_accessor :del2
14
+
15
+ # the cost of swapping two adjacent letters, defaults to 1
16
+ attr_accessor :swap
17
+
18
+ # the cost of replacing one letter with another, defaults to 1
19
+ attr_accessor :sub
20
+
21
+ # the cost of a "similar" but not exact match for two characters, defaults to 0
22
+ attr_accessor :similar
23
+
24
+ # the min of del1, del2, swap and sub, defaults to 1
25
+ attr_accessor :min
26
+
27
+ # the max of del1, del2, swap and sub, defaults to 1
28
+ attr_accessor :max
29
+
30
+ end
31
+
32
+ # limit_edit_distance finds the shortest edit distance but will
33
+ # stop and return a number at least as large as LARGE_NUM if it has
34
+ # to do more edits than a set limit.
35
+ # Note that this does NOT mean that the score returned is <= limit*w.max
36
+ # as "sub" vs "submarine" will return 6*(cost of insertion) no matter what
37
+ # the limit is.
38
+ # The edit distance is
39
+ # (cost of swap)(# of swaps) + (cost of deletion)(# of deletions)
40
+ # + (cost of insertion)(# of insertions)
41
+ # + (cost of substitutions)(# of substitutions)
42
+ #
43
+ # Preconditions:
44
+ # max(strlen(a), strlen(b))*max(of the edit weights) <= 2^15
45
+ # if violated than an incorrect result may be returned (which may be negative)
46
+ # due to overflow of a short integer
47
+ # (limit+1)*w.min < limit*w.max
48
+ # limit <= 5 (use edit_distance if limit > 5)
49
+ # where w.min and w.max is the minimum and maximum cost of an edit
50
+ # respectfully.
51
+ #
52
+ # The running time is asymptotically bounded above by
53
+ # (3^l)*n where l is the limit and n is the maxium of strlen(a),strlen(b)
54
+ # Based on my informal tests, however, the n does not really matter
55
+ # and the running time is more like (3^l).
56
+ #
57
+ # limit_edit_distance, based on my informal tests, turns out to be
58
+ # faster than edit_dist for l < 5. For l == 5 it is about the
59
+ # smaller for short strings (<= 5) and less than for longer strings
60
+ def self.limit_edit_distance(strA, strB, limit, weights); end
61
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/test_helper.rb' )
2
+
3
+ class EditDistanceTest < Test::Unit::TestCase
4
+
5
+ def test_limit_distance_defined
6
+ assert defined?(Aspeller), "module Aspell must be defined"
7
+ assert defined?(Aspeller.limit_edit_distance), "method Aspell.limit_edit_distance must be defined"
8
+ end
9
+
10
+ def test_limit_distance_returns_fixnum
11
+ result = Aspeller.limit_edit_distance("a", "b", 1, Aspeller::EditDistanceWeights.new)
12
+ assert result.is_a?(Fixnum), "limit_edit_distance is supposed to return a fixnum"
13
+ end
14
+
15
+ def test_limit_distance_returns_right_value
16
+ result = Aspeller.limit_edit_distance("test", "tast", 1, Aspeller::EditDistanceWeights.new)
17
+ assert_equal 1, result
18
+ end
19
+
20
+ def test_limit_distance_uses_passed_weights
21
+ weights = Aspeller::EditDistanceWeights.new
22
+ weights.sub = 2
23
+ result = Aspeller.limit_edit_distance("test", "tast", 1, weights)
24
+ assert_equal 2, result
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH << File.expand_path( File.dirname(__FILE__) + '/../ext' )
2
+ $LOAD_PATH << File.expand_path( File.dirname(__FILE__) + '/..' )
3
+
4
+ require 'test/unit'
5
+
6
+ system("cd #{File.expand_path(File.join(File.dirname(__FILE__), '..'))}; make distclean")
7
+
8
+ system("cd #{File.expand_path(File.join(File.dirname(__FILE__), '..'))}; ruby ext/extconf.rb")
9
+ system("cd #{File.expand_path(File.join(File.dirname(__FILE__), '..'))}; make")
10
+
11
+ require "aspell_edit_dist"
@@ -0,0 +1,51 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/test_helper.rb' )
2
+
3
+ class WeightsTest < Test::Unit::TestCase
4
+
5
+ def test_weights_defined
6
+ assert defined?(Aspeller), "Aspeller module should be defined"
7
+ assert defined?(Aspeller::EditDistanceWeights), "class Aspeller::EditDistanceWeights is supposed to be defined"
8
+ end
9
+
10
+ def test_weights_has_constructor
11
+ assert_respond_to Aspeller::EditDistanceWeights, :new
12
+ end
13
+
14
+ WEIGHT_METHODS = [:del1, :del2, :swap, :sub, :similar, :min, :max]
15
+
16
+ def test_weights_getters
17
+ weights = Aspeller::EditDistanceWeights.new
18
+ WEIGHT_METHODS.each do |method_name|
19
+ assert_respond_to weights, method_name
20
+ assert weights.send(method_name), "weights.#{method_name} should return something"
21
+ end
22
+ end
23
+
24
+ def test_weights_default_values
25
+ weights = Aspeller::EditDistanceWeights.new
26
+ assert_equal 1, weights.del1
27
+ assert_equal 1, weights.del2
28
+ assert_equal 1, weights.swap
29
+ assert_equal 1, weights.sub
30
+ assert_equal 0, weights.similar
31
+ assert_equal 1, weights.min
32
+ assert_equal 1, weights.max
33
+ end
34
+
35
+ def test_weights_setters
36
+ weights = Aspeller::EditDistanceWeights.new
37
+
38
+ n = 123
39
+ WEIGHT_METHODS.each do |method_name|
40
+ setter_name = :"#{method_name}="
41
+
42
+ assert_respond_to weights, setter_name
43
+
44
+ weights.send(setter_name, n)
45
+ assert_equal n, weights.send(method_name)
46
+
47
+ n += 1
48
+ end
49
+ end
50
+
51
+ end
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: aspell_edit_dist
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Adam Pohorecki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-29 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: adam@pohorecki.pl
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - .gitignore
26
+ - README
27
+ - Rakefile
28
+ - VERSION
29
+ - ext/aspell_edit_dist.cpp
30
+ - ext/aspell_edit_dist.h
31
+ - ext/extconf.rb
32
+ - ext/leditdist.cpp
33
+ - ext/leditdist.hpp
34
+ - ext/weights.hpp
35
+ - lib/aspell_edit_dist_stub.rb
36
+ - test/edit_distance_test.rb
37
+ - test/test_helper.rb
38
+ - test/weights_test.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/psyho/aspell_edit_dist
41
+ licenses: []
42
+
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --charset=UTF-8
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ version:
60
+ requirements: []
61
+
62
+ rubyforge_project:
63
+ rubygems_version: 1.3.5
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Gem that exposes limit_edit_distance function from Aspell.
67
+ test_files:
68
+ - test/edit_distance_test.rb
69
+ - test/weights_test.rb
70
+ - test/test_helper.rb