amatch 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +5 -2
- data/InstalledFiles +5 -0
- data/README.en +1 -1
- data/Rakefile +67 -58
- data/VERSION +1 -1
- data/bin/agrep.rb +65 -52
- data/config.save +12 -0
- data/ext/amatch.bundle +0 -0
- data/ext/amatch.c +1301 -225
- data/ext/extconf.rb +6 -1
- data/ext/pair.c +78 -0
- data/ext/pair.h +29 -0
- data/ext/tags +24 -0
- data/tests/runner.rb +26 -0
- data/tests/test_hamming.rb +54 -0
- data/tests/test_levenshtein.rb +74 -0
- data/tests/test_longest_subsequence.rb +57 -0
- data/tests/test_longest_substring.rb +57 -0
- data/tests/test_pair_distance.rb +81 -0
- data/tests/test_sellers.rb +94 -0
- metadata +26 -8
- data/amatch.txt.en +0 -117
- data/tests/test.rb +0 -94
data/ext/extconf.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
#
|
2
|
-
## $Id: extconf.rb,v 1.
|
2
|
+
## $Id: extconf.rb,v 1.2 2005/04/22 00:04:15 flori Exp $
|
3
3
|
#
|
4
4
|
|
5
5
|
require 'mkmf'
|
6
|
+
require 'rbconfig'
|
7
|
+
if CONFIG['CC'] = 'gcc'
|
8
|
+
CONFIG['CC'] = 'gcc -Wall '
|
9
|
+
end
|
6
10
|
create_makefile 'amatch'
|
11
|
+
# vim: set et sw=2 ts=2:
|
data/ext/pair.c
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
#include "pair.h"
|
2
|
+
|
3
|
+
#define DEBUG 0
|
4
|
+
|
5
|
+
static int predict_length(VALUE tokens)
|
6
|
+
{
|
7
|
+
int i, l, result;
|
8
|
+
for (i = 0, result = 0; i < RARRAY(tokens)->len; i++) {
|
9
|
+
VALUE t = rb_ary_entry(tokens, i);
|
10
|
+
l = RSTRING(t)->len - 1;
|
11
|
+
if (l > 0) result += l;
|
12
|
+
}
|
13
|
+
return result;
|
14
|
+
}
|
15
|
+
|
16
|
+
PairArray *PairArray_new(VALUE tokens)
|
17
|
+
{
|
18
|
+
int i, j, k, len = predict_length(tokens);
|
19
|
+
PairArray *pair_array = ALLOC(PairArray);
|
20
|
+
Pair *pairs = ALLOC_N(Pair, len);
|
21
|
+
MEMZERO(pairs, Pair, len);
|
22
|
+
pair_array->pairs = pairs;
|
23
|
+
pair_array->len = len;
|
24
|
+
for (i = 0, k = 0; i < RARRAY(tokens)->len; i++) {
|
25
|
+
VALUE t = rb_ary_entry(tokens, i);
|
26
|
+
char *string = RSTRING(t)->ptr;
|
27
|
+
for (j = 0; j < RSTRING(t)->len - 1; j++) {
|
28
|
+
pairs[k].fst = string[j];
|
29
|
+
pairs[k].snd = string[j + 1];
|
30
|
+
pairs[k].status = PAIR_ACTIVE;
|
31
|
+
k++;
|
32
|
+
}
|
33
|
+
}
|
34
|
+
return pair_array;
|
35
|
+
}
|
36
|
+
|
37
|
+
void pair_array_reactivate(PairArray *self)
|
38
|
+
{
|
39
|
+
int i;
|
40
|
+
for (i = 0; i < self->len; i++) {
|
41
|
+
self->pairs[i].status = PAIR_ACTIVE;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
double pair_array_match(PairArray *self, PairArray *other)
|
46
|
+
{
|
47
|
+
int i, j, matches = 0;
|
48
|
+
int sum = self->len + other->len;
|
49
|
+
if (sum == 0) return 1.0;
|
50
|
+
for (i = 0; i < self->len; i++) {
|
51
|
+
for (j = 0; j < other->len; j++) {
|
52
|
+
#if DEBUG
|
53
|
+
pair_print(self->pairs[i]);
|
54
|
+
putc(' ', stdout);
|
55
|
+
pair_print(other->pairs[j]);
|
56
|
+
printf(" -> %d\n", pair_equal(self->pairs[i], other->pairs[j]));
|
57
|
+
#endif
|
58
|
+
if (pair_equal(self->pairs[i], other->pairs[j])) {
|
59
|
+
matches++;
|
60
|
+
other->pairs[j].status = PAIR_INACTIVE;
|
61
|
+
break;
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
return ((double) (2 * matches)) / sum;
|
66
|
+
}
|
67
|
+
|
68
|
+
void pair_print(Pair pair)
|
69
|
+
{
|
70
|
+
printf("%c%c (%d)", pair.fst, pair.snd, pair.status);
|
71
|
+
}
|
72
|
+
|
73
|
+
void pair_array_destroy(PairArray *pair_array)
|
74
|
+
{
|
75
|
+
free(pair_array->pairs);
|
76
|
+
free(pair_array);
|
77
|
+
}
|
78
|
+
/* vim: set et cindent sw=4 ts=4: */
|
data/ext/pair.h
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef PAIR_H_INCLUDED
|
2
|
+
#define PAIR_H_INCLUDED
|
3
|
+
|
4
|
+
#include "ruby.h"
|
5
|
+
|
6
|
+
enum { PAIR_ACTIVE = 1, PAIR_INACTIVE = 2 };
|
7
|
+
|
8
|
+
typedef struct PairStruct {
|
9
|
+
char fst;
|
10
|
+
char snd;
|
11
|
+
char status;
|
12
|
+
char __align;
|
13
|
+
} Pair;
|
14
|
+
|
15
|
+
typedef struct PairArrayStruct {
|
16
|
+
Pair *pairs;
|
17
|
+
int len;
|
18
|
+
} PairArray;
|
19
|
+
|
20
|
+
PairArray *PairArray_new(VALUE tokens);
|
21
|
+
#define pair_equal(a, b) \
|
22
|
+
((a).fst == (b).fst && (a).snd == (b).snd && ((a).status & (b).status & PAIR_ACTIVE))
|
23
|
+
double pair_array_match(PairArray *self, PairArray *other);
|
24
|
+
void pair_array_destroy(PairArray *pair_array);
|
25
|
+
void pair_print(Pair pair);
|
26
|
+
void pair_array_reactivate(PairArray *self);
|
27
|
+
|
28
|
+
#endif
|
29
|
+
/* vim: set et cindent sw=4 ts=4: */
|
data/ext/tags
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
CAST2FLOAT amatch.c /^#define CAST2FLOAT(obj) \\$/
|
2
|
+
DEF_ALLOCATOR amatch.c /^#define DEF_ALLOCATOR(type) /
|
3
|
+
DEF_CONSTRUCTOR amatch.c /^#define DEF_CONSTRUCTOR(klass, type) /
|
4
|
+
DEF_ITERATE_STRINGS amatch.c /^#define DEF_ITERATE_STRINGS(type) /
|
5
|
+
DEF_PATTERN_ACCESSOR amatch.c /^#define DEF_PATTERN_ACCESSOR(type) /
|
6
|
+
DEF_RB_FREE amatch.c /^#define DEF_RB_FREE(klass, type) /
|
7
|
+
DEF_RB_READER amatch.c /^#define DEF_RB_READER(type, function, name, conver/
|
8
|
+
DEF_RB_WRITER amatch.c /^#define DEF_RB_WRITER(type, function, name, vtype,/
|
9
|
+
FLOAT2C amatch.c /^#define FLOAT2C(obj) RFLOAT(obj)->value$/
|
10
|
+
GET_STRUCT amatch.c /^#define GET_STRUCT(klass) \\$/
|
11
|
+
Init_amatch amatch.c /^void Init_amatch()$/
|
12
|
+
Levenshtein_search amatch.c /^static VALUE Levenshtein_search(Levenshtein *amatc/
|
13
|
+
PairDistance_match amatch.c /^static VALUE PairDistance_match($/
|
14
|
+
amatch_LongestSubstring amatch.c /^static VALUE amatch_LongestSubstring(General *amat/
|
15
|
+
hamming amatch.c /^static VALUE hamming(General *amatch, VALUE string/
|
16
|
+
longest_subsequence amatch.c /^static VALUE longest_subsequence(General *amatch, /
|
17
|
+
rb_Levenshtein_initialize amatch.c /^static VALUE rb_Levenshtein_initialize(VALUE self,/
|
18
|
+
rb_Levenshtein_search amatch.c /^static VALUE rb_Levenshtein_search(VALUE self, VAL/
|
19
|
+
rb_str_Levenshtein_match amatch.c /^static VALUE rb_str_Levenshtein_match(VALUE self, /
|
20
|
+
rb_str_Levenshtein_search amatch.c /^static VALUE rb_str_Levenshtein_search(VALUE self,/
|
21
|
+
rb_str_hamming amatch.c /^static VALUE rb_str_hamming(VALUE self, VALUE stri/
|
22
|
+
rb_str_longest_subsequence amatch.c /^static VALUE rb_str_longest_subsequence(VALUE self/
|
23
|
+
rb_str_longest_substring amatch.c /^static VALUE rb_str_longest_substring(VALUE self, /
|
24
|
+
rb_str_pair_distance amatch.c /^static VALUE rb_str_pair_distance(VALUE self, VALU/
|
data/tests/runner.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit/ui/console/testrunner'
|
4
|
+
require 'test/unit/testsuite'
|
5
|
+
$:.unshift File.expand_path(File.dirname($0))
|
6
|
+
require 'test_levenshtein'
|
7
|
+
require 'test_sellers'
|
8
|
+
require 'test_pair_distance'
|
9
|
+
require 'test_hamming'
|
10
|
+
require 'test_longest_subsequence'
|
11
|
+
require 'test_longest_substring'
|
12
|
+
|
13
|
+
class TS_AllTests
|
14
|
+
def self.suite
|
15
|
+
suite = Test::Unit::TestSuite.new 'All tests'
|
16
|
+
suite << TC_Levenshtein.suite
|
17
|
+
suite << TC_Sellers.suite
|
18
|
+
suite << TC_PairDistance.suite
|
19
|
+
suite << TC_Hamming.suite
|
20
|
+
suite << TC_LongestSubsequence.suite
|
21
|
+
suite << TC_LongestSubstring.suite
|
22
|
+
suite
|
23
|
+
end
|
24
|
+
end
|
25
|
+
Test::Unit::UI::Console::TestRunner.run(TS_AllTests)
|
26
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_Hamming < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = Hamming.new('test')
|
11
|
+
@empty = Hamming.new('')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_empty
|
15
|
+
assert_in_delta 0, @empty.match(''), D
|
16
|
+
assert_in_delta 9, @empty.match('not empty'), D
|
17
|
+
assert_in_delta 1, @empty.similar(''), D
|
18
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_small_match
|
22
|
+
assert_in_delta 4, @small.match(''), D
|
23
|
+
assert_in_delta 0, @small.match('test'), D
|
24
|
+
assert_in_delta 1, @small.match('testa'), D
|
25
|
+
assert_in_delta 5, @small.match('atest'), D
|
26
|
+
assert_in_delta 3, @small.match('teast'), D
|
27
|
+
assert_in_delta 4, @small.match('est'), D
|
28
|
+
assert_in_delta 1, @small.match('tes'), D
|
29
|
+
assert_in_delta 3, @small.match('tst'), D
|
30
|
+
assert_in_delta 1, @small.match('best'), D
|
31
|
+
assert_in_delta 1, @small.match('tost'), D
|
32
|
+
assert_in_delta 1, @small.match('tesa'), D
|
33
|
+
assert_in_delta 3, @small.match('taex'), D
|
34
|
+
assert_in_delta 9, @small.match('aaatestbbb'), D
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_small_similar
|
38
|
+
assert_in_delta 0.0, @small.similar(''), D
|
39
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
40
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
41
|
+
assert_in_delta 0.0, @small.similar('atest'), D
|
42
|
+
assert_in_delta 0.4, @small.similar('teast'), D
|
43
|
+
assert_in_delta 0, @small.similar('est'), D
|
44
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
45
|
+
assert_in_delta 0.25, @small.similar('tst'), D
|
46
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('tost'), D
|
48
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
49
|
+
assert_in_delta 0.25, @small.similar('taex'), D
|
50
|
+
assert_in_delta 0.1, @small.similar('aaatestbbb'), D
|
51
|
+
assert_in_delta 0.8, @small.pattern.hamming_similar('testa'), D
|
52
|
+
end
|
53
|
+
end
|
54
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_Levenshtein < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@empty = Levenshtein.new('')
|
11
|
+
@simple = Levenshtein.new('test')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_match
|
15
|
+
assert_equal 4, @simple.match('')
|
16
|
+
assert_equal 0, @simple.match('test')
|
17
|
+
assert_equal 0, @simple.match('test')
|
18
|
+
assert_equal 1, @simple.match('testa')
|
19
|
+
assert_equal 1, @simple.match('atest')
|
20
|
+
assert_equal 1, @simple.match('teast')
|
21
|
+
assert_equal 1, @simple.match('est')
|
22
|
+
assert_equal 1, @simple.match('tes')
|
23
|
+
assert_equal 1, @simple.match('tst')
|
24
|
+
assert_equal 1, @simple.match('best')
|
25
|
+
assert_equal 1, @simple.match('tost')
|
26
|
+
assert_equal 1, @simple.match('tesa')
|
27
|
+
assert_equal 3, @simple.match('taex')
|
28
|
+
assert_equal 6, @simple.match('aaatestbbb')
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_search
|
32
|
+
assert_equal 4, @simple.search('')
|
33
|
+
assert_equal 0, @empty.search('')
|
34
|
+
assert_equal 0, @empty.search('test')
|
35
|
+
assert_equal 0, @simple.search('aaatestbbb')
|
36
|
+
assert_equal 3, @simple.search('aaataexbbb')
|
37
|
+
assert_equal 4, @simple.search('aaaaaaaaa')
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_array_result
|
41
|
+
assert_equal [2, 0], @simple.match(["tets", "test"])
|
42
|
+
assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
|
43
|
+
assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_pattern_setting
|
47
|
+
assert_raises(TypeError) { @simple.pattern = :something }
|
48
|
+
assert_equal 0, @simple.match('test')
|
49
|
+
@simple.pattern = ''
|
50
|
+
assert_equal 4, @simple.match('test')
|
51
|
+
@simple.pattern = 'test'
|
52
|
+
assert_equal 0, @simple.match('test')
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_similar
|
56
|
+
assert_in_delta 1, @empty.similar(''), D
|
57
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
58
|
+
assert_in_delta 0.0, @simple.similar(''), D
|
59
|
+
assert_in_delta 1.0, @simple.similar('test'), D
|
60
|
+
assert_in_delta 0.8, @simple.similar('testa'), D
|
61
|
+
assert_in_delta 0.8, @simple.similar('atest'), D
|
62
|
+
assert_in_delta 0.8, @simple.similar('teast'), D
|
63
|
+
assert_in_delta 0.75, @simple.similar('est'), D
|
64
|
+
assert_in_delta 0.75, @simple.similar('tes'), D
|
65
|
+
assert_in_delta 0.75, @simple.similar('tst'), D
|
66
|
+
assert_in_delta 0.75, @simple.similar('best'), D
|
67
|
+
assert_in_delta 0.75, @simple.similar('tost'), D
|
68
|
+
assert_in_delta 0.75, @simple.similar('tesa'), D
|
69
|
+
assert_in_delta 0.25, @simple.similar('taex'), D
|
70
|
+
assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
|
71
|
+
assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
|
72
|
+
end
|
73
|
+
end
|
74
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_LongestSubsequence < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = LongestSubsequence.new('test')
|
11
|
+
@empty = LongestSubsequence.new('')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_empty_subsequence
|
15
|
+
assert_equal 0, @empty.match('')
|
16
|
+
assert_equal 0, @empty.match('a')
|
17
|
+
assert_equal 0, @small.match('')
|
18
|
+
assert_equal 0, @empty.match('not empty')
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_small_subsequence
|
22
|
+
assert_equal 4, @small.match('test')
|
23
|
+
assert_equal 4, @small.match('testa')
|
24
|
+
assert_equal 4, @small.match('atest')
|
25
|
+
assert_equal 4, @small.match('teast')
|
26
|
+
assert_equal 3, @small.match('est')
|
27
|
+
assert_equal 3, @small.match('tes')
|
28
|
+
assert_equal 3, @small.match('tst')
|
29
|
+
assert_equal 3, @small.match('best')
|
30
|
+
assert_equal 3, @small.match('tost')
|
31
|
+
assert_equal 3, @small.match('tesa')
|
32
|
+
assert_equal 2, @small.match('taex')
|
33
|
+
assert_equal 1, @small.match('aaatbbb')
|
34
|
+
assert_equal 1, @small.match('aaasbbb')
|
35
|
+
assert_equal 4, @small.match('aaatestbbb')
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_similar
|
39
|
+
assert_in_delta 1, @empty.similar(''), D
|
40
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
41
|
+
assert_in_delta 0.0, @small.similar(''), D
|
42
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
43
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
44
|
+
assert_in_delta 0.8, @small.similar('atest'), D
|
45
|
+
assert_in_delta 0.8, @small.similar('teast'), D
|
46
|
+
assert_in_delta 0.75, @small.similar('est'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
48
|
+
assert_in_delta 0.75, @small.similar('tst'), D
|
49
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
50
|
+
assert_in_delta 0.75, @small.similar('tost'), D
|
51
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
52
|
+
assert_in_delta 0.50, @small.similar('taex'), D
|
53
|
+
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
54
|
+
assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
|
55
|
+
end
|
56
|
+
end
|
57
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_LongestSubstring < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = LongestSubstring.new('test')
|
11
|
+
@empty = LongestSubstring.new('')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_empty_substring
|
15
|
+
assert_in_delta 0, @empty.match(''), D
|
16
|
+
assert_in_delta 0, @empty.match('a'), D
|
17
|
+
assert_in_delta 0, @small.match(''), D
|
18
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_small_substring
|
22
|
+
assert_in_delta 4, @small.match('test'), D
|
23
|
+
assert_in_delta 4, @small.match('testa'), D
|
24
|
+
assert_in_delta 4, @small.match('atest'), D
|
25
|
+
assert_in_delta 2, @small.match('teast'), D
|
26
|
+
assert_in_delta 3, @small.match('est'), D
|
27
|
+
assert_in_delta 3, @small.match('tes'), D
|
28
|
+
assert_in_delta 2, @small.match('tst'), D
|
29
|
+
assert_in_delta 3, @small.match('best'), D
|
30
|
+
assert_in_delta 2, @small.match('tost'), D
|
31
|
+
assert_in_delta 3, @small.match('tesa'), D
|
32
|
+
assert_in_delta 1, @small.match('taex'), D
|
33
|
+
assert_in_delta 1, @small.match('aaatbbb'), D
|
34
|
+
assert_in_delta 1, @small.match('aaasbbb'), D
|
35
|
+
assert_in_delta 4, @small.match('aaatestbbb'), D
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_similar
|
39
|
+
assert_in_delta 1, @empty.similar(''), D
|
40
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
41
|
+
assert_in_delta 0.0, @small.similar(''), D
|
42
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
43
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
44
|
+
assert_in_delta 0.8, @small.similar('atest'), D
|
45
|
+
assert_in_delta 0.4, @small.similar('teast'), D
|
46
|
+
assert_in_delta 0.75, @small.similar('est'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
48
|
+
assert_in_delta 0.5, @small.similar('tst'), D
|
49
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
50
|
+
assert_in_delta 0.5, @small.similar('tost'), D
|
51
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
52
|
+
assert_in_delta 0.25, @small.similar('taex'), D
|
53
|
+
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
54
|
+
assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
|
55
|
+
end
|
56
|
+
end
|
57
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_PairDistance < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@single = PairDistance.new('test')
|
11
|
+
@empty = PairDistance.new('')
|
12
|
+
@france = PairDistance.new('republic of france')
|
13
|
+
@germany = PairDistance.new('federal republic of germany')
|
14
|
+
@csv = PairDistance.new('foo,bar,baz')
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_empty
|
18
|
+
assert_in_delta 1, @empty.match(''), D
|
19
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
20
|
+
assert_in_delta 1, @empty.similar(''), D
|
21
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_countries
|
25
|
+
assert_in_delta 0.5555555, @france.match('france'), D
|
26
|
+
assert_in_delta 0.1052631, @france.match('germany'), D
|
27
|
+
assert_in_delta 0.4615384, @germany.match('germany'), D
|
28
|
+
assert_in_delta 0.16, @germany.match('france'), D
|
29
|
+
assert_in_delta 0.6829268,
|
30
|
+
@germany.match('german democratic republic'), D
|
31
|
+
assert_in_delta 0.72,
|
32
|
+
@france.match('french republic'), D
|
33
|
+
assert_in_delta 0.4375,
|
34
|
+
@germany.match('french republic'), D
|
35
|
+
assert_in_delta 0.5294117,
|
36
|
+
@france.match('german democratic republic'), D
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_single
|
40
|
+
assert_in_delta 0, @single.match(''), D
|
41
|
+
assert_in_delta 1, @single.match('test'), D
|
42
|
+
assert_in_delta 0.8571428, @single.match('testa'), D
|
43
|
+
assert_in_delta 0.8571428, @single.match('atest'), D
|
44
|
+
assert_in_delta 0.5714285, @single.match('teast'), D
|
45
|
+
assert_in_delta 0.8, @single.match('est'), D
|
46
|
+
assert_in_delta 0.8, @single.match('tes'), D
|
47
|
+
assert_in_delta 0.4, @single.match('tst'), D
|
48
|
+
assert_in_delta 0.6666666, @single.match('best'), D
|
49
|
+
assert_in_delta 0.3333333, @single.match('tost'), D
|
50
|
+
assert_in_delta 0.6666666, @single.match('tesa'), D
|
51
|
+
assert_in_delta 0.0, @single.match('taex'), D
|
52
|
+
assert_in_delta 0.5, @single.match('aaatestbbb'), D
|
53
|
+
assert_in_delta 0.6, @single.match('aaa test bbb'), D
|
54
|
+
assert_in_delta 0.6, @single.match('test aaa bbb'), D
|
55
|
+
assert_in_delta 0.6, @single.match('bbb aaa test'), D
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_csv
|
59
|
+
assert_in_delta 0, @csv.match('', /,/), D
|
60
|
+
assert_in_delta 0.5, @csv.match('foo', /,/), D
|
61
|
+
assert_in_delta 0.5, @csv.match('bar', /,/), D
|
62
|
+
assert_in_delta 0.5, @csv.match('baz', /,/), D
|
63
|
+
assert_in_delta 0.8, @csv.match('foo,bar', /,/), D
|
64
|
+
assert_in_delta 0.8, @csv.match('bar,foo', /,/), D
|
65
|
+
assert_in_delta 0.8, @csv.match('bar,baz', /,/), D
|
66
|
+
assert_in_delta 0.8, @csv.match('baz,bar', /,/), D
|
67
|
+
assert_in_delta 0.8, @csv.match('foo,baz', /,/), D
|
68
|
+
assert_in_delta 0.8, @csv.match('baz,foo', /,/), D
|
69
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', /,/), D
|
70
|
+
assert_in_delta 1, @csv.match('foo,baz,bar', /,/), D
|
71
|
+
assert_in_delta 1, @csv.match('baz,foo,bar', /,/), D
|
72
|
+
assert_in_delta 1, @csv.match('baz,bar,foo', /,/), D
|
73
|
+
assert_in_delta 1, @csv.match('bar,foo,baz', /,/), D
|
74
|
+
assert_in_delta 1, @csv.match('bar,baz,foo', /,/), D
|
75
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', nil), D
|
76
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar', nil), D
|
77
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar'), D
|
78
|
+
assert_in_delta 0.9, @csv.similar('foo,baz,bar'), D
|
79
|
+
end
|
80
|
+
end
|
81
|
+
# vim: set et sw=2 ts=2:
|