amatch 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +5 -2
- data/InstalledFiles +5 -0
- data/README.en +1 -1
- data/Rakefile +67 -58
- data/VERSION +1 -1
- data/bin/agrep.rb +65 -52
- data/config.save +12 -0
- data/ext/amatch.bundle +0 -0
- data/ext/amatch.c +1301 -225
- data/ext/extconf.rb +6 -1
- data/ext/pair.c +78 -0
- data/ext/pair.h +29 -0
- data/ext/tags +24 -0
- data/tests/runner.rb +26 -0
- data/tests/test_hamming.rb +54 -0
- data/tests/test_levenshtein.rb +74 -0
- data/tests/test_longest_subsequence.rb +57 -0
- data/tests/test_longest_substring.rb +57 -0
- data/tests/test_pair_distance.rb +81 -0
- data/tests/test_sellers.rb +94 -0
- metadata +26 -8
- data/amatch.txt.en +0 -117
- data/tests/test.rb +0 -94
data/ext/extconf.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
#
|
2
|
-
## $Id: extconf.rb,v 1.
|
2
|
+
## $Id: extconf.rb,v 1.2 2005/04/22 00:04:15 flori Exp $
|
3
3
|
#
|
4
4
|
|
5
5
|
require 'mkmf'
|
6
|
+
require 'rbconfig'
|
7
|
+
if CONFIG['CC'] = 'gcc'
|
8
|
+
CONFIG['CC'] = 'gcc -Wall '
|
9
|
+
end
|
6
10
|
create_makefile 'amatch'
|
11
|
+
# vim: set et sw=2 ts=2:
|
data/ext/pair.c
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
#include "pair.h"
|
2
|
+
|
3
|
+
#define DEBUG 0
|
4
|
+
|
5
|
+
static int predict_length(VALUE tokens)
|
6
|
+
{
|
7
|
+
int i, l, result;
|
8
|
+
for (i = 0, result = 0; i < RARRAY(tokens)->len; i++) {
|
9
|
+
VALUE t = rb_ary_entry(tokens, i);
|
10
|
+
l = RSTRING(t)->len - 1;
|
11
|
+
if (l > 0) result += l;
|
12
|
+
}
|
13
|
+
return result;
|
14
|
+
}
|
15
|
+
|
16
|
+
PairArray *PairArray_new(VALUE tokens)
|
17
|
+
{
|
18
|
+
int i, j, k, len = predict_length(tokens);
|
19
|
+
PairArray *pair_array = ALLOC(PairArray);
|
20
|
+
Pair *pairs = ALLOC_N(Pair, len);
|
21
|
+
MEMZERO(pairs, Pair, len);
|
22
|
+
pair_array->pairs = pairs;
|
23
|
+
pair_array->len = len;
|
24
|
+
for (i = 0, k = 0; i < RARRAY(tokens)->len; i++) {
|
25
|
+
VALUE t = rb_ary_entry(tokens, i);
|
26
|
+
char *string = RSTRING(t)->ptr;
|
27
|
+
for (j = 0; j < RSTRING(t)->len - 1; j++) {
|
28
|
+
pairs[k].fst = string[j];
|
29
|
+
pairs[k].snd = string[j + 1];
|
30
|
+
pairs[k].status = PAIR_ACTIVE;
|
31
|
+
k++;
|
32
|
+
}
|
33
|
+
}
|
34
|
+
return pair_array;
|
35
|
+
}
|
36
|
+
|
37
|
+
void pair_array_reactivate(PairArray *self)
|
38
|
+
{
|
39
|
+
int i;
|
40
|
+
for (i = 0; i < self->len; i++) {
|
41
|
+
self->pairs[i].status = PAIR_ACTIVE;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
double pair_array_match(PairArray *self, PairArray *other)
|
46
|
+
{
|
47
|
+
int i, j, matches = 0;
|
48
|
+
int sum = self->len + other->len;
|
49
|
+
if (sum == 0) return 1.0;
|
50
|
+
for (i = 0; i < self->len; i++) {
|
51
|
+
for (j = 0; j < other->len; j++) {
|
52
|
+
#if DEBUG
|
53
|
+
pair_print(self->pairs[i]);
|
54
|
+
putc(' ', stdout);
|
55
|
+
pair_print(other->pairs[j]);
|
56
|
+
printf(" -> %d\n", pair_equal(self->pairs[i], other->pairs[j]));
|
57
|
+
#endif
|
58
|
+
if (pair_equal(self->pairs[i], other->pairs[j])) {
|
59
|
+
matches++;
|
60
|
+
other->pairs[j].status = PAIR_INACTIVE;
|
61
|
+
break;
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
return ((double) (2 * matches)) / sum;
|
66
|
+
}
|
67
|
+
|
68
|
+
void pair_print(Pair pair)
|
69
|
+
{
|
70
|
+
printf("%c%c (%d)", pair.fst, pair.snd, pair.status);
|
71
|
+
}
|
72
|
+
|
73
|
+
void pair_array_destroy(PairArray *pair_array)
|
74
|
+
{
|
75
|
+
free(pair_array->pairs);
|
76
|
+
free(pair_array);
|
77
|
+
}
|
78
|
+
/* vim: set et cindent sw=4 ts=4: */
|
data/ext/pair.h
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef PAIR_H_INCLUDED
|
2
|
+
#define PAIR_H_INCLUDED
|
3
|
+
|
4
|
+
#include "ruby.h"
|
5
|
+
|
6
|
+
enum { PAIR_ACTIVE = 1, PAIR_INACTIVE = 2 };
|
7
|
+
|
8
|
+
typedef struct PairStruct {
|
9
|
+
char fst;
|
10
|
+
char snd;
|
11
|
+
char status;
|
12
|
+
char __align;
|
13
|
+
} Pair;
|
14
|
+
|
15
|
+
typedef struct PairArrayStruct {
|
16
|
+
Pair *pairs;
|
17
|
+
int len;
|
18
|
+
} PairArray;
|
19
|
+
|
20
|
+
PairArray *PairArray_new(VALUE tokens);
|
21
|
+
#define pair_equal(a, b) \
|
22
|
+
((a).fst == (b).fst && (a).snd == (b).snd && ((a).status & (b).status & PAIR_ACTIVE))
|
23
|
+
double pair_array_match(PairArray *self, PairArray *other);
|
24
|
+
void pair_array_destroy(PairArray *pair_array);
|
25
|
+
void pair_print(Pair pair);
|
26
|
+
void pair_array_reactivate(PairArray *self);
|
27
|
+
|
28
|
+
#endif
|
29
|
+
/* vim: set et cindent sw=4 ts=4: */
|
data/ext/tags
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
CAST2FLOAT amatch.c /^#define CAST2FLOAT(obj) \\$/
|
2
|
+
DEF_ALLOCATOR amatch.c /^#define DEF_ALLOCATOR(type) /
|
3
|
+
DEF_CONSTRUCTOR amatch.c /^#define DEF_CONSTRUCTOR(klass, type) /
|
4
|
+
DEF_ITERATE_STRINGS amatch.c /^#define DEF_ITERATE_STRINGS(type) /
|
5
|
+
DEF_PATTERN_ACCESSOR amatch.c /^#define DEF_PATTERN_ACCESSOR(type) /
|
6
|
+
DEF_RB_FREE amatch.c /^#define DEF_RB_FREE(klass, type) /
|
7
|
+
DEF_RB_READER amatch.c /^#define DEF_RB_READER(type, function, name, conver/
|
8
|
+
DEF_RB_WRITER amatch.c /^#define DEF_RB_WRITER(type, function, name, vtype,/
|
9
|
+
FLOAT2C amatch.c /^#define FLOAT2C(obj) RFLOAT(obj)->value$/
|
10
|
+
GET_STRUCT amatch.c /^#define GET_STRUCT(klass) \\$/
|
11
|
+
Init_amatch amatch.c /^void Init_amatch()$/
|
12
|
+
Levenshtein_search amatch.c /^static VALUE Levenshtein_search(Levenshtein *amatc/
|
13
|
+
PairDistance_match amatch.c /^static VALUE PairDistance_match($/
|
14
|
+
amatch_LongestSubstring amatch.c /^static VALUE amatch_LongestSubstring(General *amat/
|
15
|
+
hamming amatch.c /^static VALUE hamming(General *amatch, VALUE string/
|
16
|
+
longest_subsequence amatch.c /^static VALUE longest_subsequence(General *amatch, /
|
17
|
+
rb_Levenshtein_initialize amatch.c /^static VALUE rb_Levenshtein_initialize(VALUE self,/
|
18
|
+
rb_Levenshtein_search amatch.c /^static VALUE rb_Levenshtein_search(VALUE self, VAL/
|
19
|
+
rb_str_Levenshtein_match amatch.c /^static VALUE rb_str_Levenshtein_match(VALUE self, /
|
20
|
+
rb_str_Levenshtein_search amatch.c /^static VALUE rb_str_Levenshtein_search(VALUE self,/
|
21
|
+
rb_str_hamming amatch.c /^static VALUE rb_str_hamming(VALUE self, VALUE stri/
|
22
|
+
rb_str_longest_subsequence amatch.c /^static VALUE rb_str_longest_subsequence(VALUE self/
|
23
|
+
rb_str_longest_substring amatch.c /^static VALUE rb_str_longest_substring(VALUE self, /
|
24
|
+
rb_str_pair_distance amatch.c /^static VALUE rb_str_pair_distance(VALUE self, VALU/
|
data/tests/runner.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit/ui/console/testrunner'
|
4
|
+
require 'test/unit/testsuite'
|
5
|
+
$:.unshift File.expand_path(File.dirname($0))
|
6
|
+
require 'test_levenshtein'
|
7
|
+
require 'test_sellers'
|
8
|
+
require 'test_pair_distance'
|
9
|
+
require 'test_hamming'
|
10
|
+
require 'test_longest_subsequence'
|
11
|
+
require 'test_longest_substring'
|
12
|
+
|
13
|
+
class TS_AllTests
|
14
|
+
def self.suite
|
15
|
+
suite = Test::Unit::TestSuite.new 'All tests'
|
16
|
+
suite << TC_Levenshtein.suite
|
17
|
+
suite << TC_Sellers.suite
|
18
|
+
suite << TC_PairDistance.suite
|
19
|
+
suite << TC_Hamming.suite
|
20
|
+
suite << TC_LongestSubsequence.suite
|
21
|
+
suite << TC_LongestSubstring.suite
|
22
|
+
suite
|
23
|
+
end
|
24
|
+
end
|
25
|
+
Test::Unit::UI::Console::TestRunner.run(TS_AllTests)
|
26
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_Hamming < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = Hamming.new('test')
|
11
|
+
@empty = Hamming.new('')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_empty
|
15
|
+
assert_in_delta 0, @empty.match(''), D
|
16
|
+
assert_in_delta 9, @empty.match('not empty'), D
|
17
|
+
assert_in_delta 1, @empty.similar(''), D
|
18
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_small_match
|
22
|
+
assert_in_delta 4, @small.match(''), D
|
23
|
+
assert_in_delta 0, @small.match('test'), D
|
24
|
+
assert_in_delta 1, @small.match('testa'), D
|
25
|
+
assert_in_delta 5, @small.match('atest'), D
|
26
|
+
assert_in_delta 3, @small.match('teast'), D
|
27
|
+
assert_in_delta 4, @small.match('est'), D
|
28
|
+
assert_in_delta 1, @small.match('tes'), D
|
29
|
+
assert_in_delta 3, @small.match('tst'), D
|
30
|
+
assert_in_delta 1, @small.match('best'), D
|
31
|
+
assert_in_delta 1, @small.match('tost'), D
|
32
|
+
assert_in_delta 1, @small.match('tesa'), D
|
33
|
+
assert_in_delta 3, @small.match('taex'), D
|
34
|
+
assert_in_delta 9, @small.match('aaatestbbb'), D
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_small_similar
|
38
|
+
assert_in_delta 0.0, @small.similar(''), D
|
39
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
40
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
41
|
+
assert_in_delta 0.0, @small.similar('atest'), D
|
42
|
+
assert_in_delta 0.4, @small.similar('teast'), D
|
43
|
+
assert_in_delta 0, @small.similar('est'), D
|
44
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
45
|
+
assert_in_delta 0.25, @small.similar('tst'), D
|
46
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('tost'), D
|
48
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
49
|
+
assert_in_delta 0.25, @small.similar('taex'), D
|
50
|
+
assert_in_delta 0.1, @small.similar('aaatestbbb'), D
|
51
|
+
assert_in_delta 0.8, @small.pattern.hamming_similar('testa'), D
|
52
|
+
end
|
53
|
+
end
|
54
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_Levenshtein < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@empty = Levenshtein.new('')
|
11
|
+
@simple = Levenshtein.new('test')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_match
|
15
|
+
assert_equal 4, @simple.match('')
|
16
|
+
assert_equal 0, @simple.match('test')
|
17
|
+
assert_equal 0, @simple.match('test')
|
18
|
+
assert_equal 1, @simple.match('testa')
|
19
|
+
assert_equal 1, @simple.match('atest')
|
20
|
+
assert_equal 1, @simple.match('teast')
|
21
|
+
assert_equal 1, @simple.match('est')
|
22
|
+
assert_equal 1, @simple.match('tes')
|
23
|
+
assert_equal 1, @simple.match('tst')
|
24
|
+
assert_equal 1, @simple.match('best')
|
25
|
+
assert_equal 1, @simple.match('tost')
|
26
|
+
assert_equal 1, @simple.match('tesa')
|
27
|
+
assert_equal 3, @simple.match('taex')
|
28
|
+
assert_equal 6, @simple.match('aaatestbbb')
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_search
|
32
|
+
assert_equal 4, @simple.search('')
|
33
|
+
assert_equal 0, @empty.search('')
|
34
|
+
assert_equal 0, @empty.search('test')
|
35
|
+
assert_equal 0, @simple.search('aaatestbbb')
|
36
|
+
assert_equal 3, @simple.search('aaataexbbb')
|
37
|
+
assert_equal 4, @simple.search('aaaaaaaaa')
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_array_result
|
41
|
+
assert_equal [2, 0], @simple.match(["tets", "test"])
|
42
|
+
assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
|
43
|
+
assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_pattern_setting
|
47
|
+
assert_raises(TypeError) { @simple.pattern = :something }
|
48
|
+
assert_equal 0, @simple.match('test')
|
49
|
+
@simple.pattern = ''
|
50
|
+
assert_equal 4, @simple.match('test')
|
51
|
+
@simple.pattern = 'test'
|
52
|
+
assert_equal 0, @simple.match('test')
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_similar
|
56
|
+
assert_in_delta 1, @empty.similar(''), D
|
57
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
58
|
+
assert_in_delta 0.0, @simple.similar(''), D
|
59
|
+
assert_in_delta 1.0, @simple.similar('test'), D
|
60
|
+
assert_in_delta 0.8, @simple.similar('testa'), D
|
61
|
+
assert_in_delta 0.8, @simple.similar('atest'), D
|
62
|
+
assert_in_delta 0.8, @simple.similar('teast'), D
|
63
|
+
assert_in_delta 0.75, @simple.similar('est'), D
|
64
|
+
assert_in_delta 0.75, @simple.similar('tes'), D
|
65
|
+
assert_in_delta 0.75, @simple.similar('tst'), D
|
66
|
+
assert_in_delta 0.75, @simple.similar('best'), D
|
67
|
+
assert_in_delta 0.75, @simple.similar('tost'), D
|
68
|
+
assert_in_delta 0.75, @simple.similar('tesa'), D
|
69
|
+
assert_in_delta 0.25, @simple.similar('taex'), D
|
70
|
+
assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
|
71
|
+
assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
|
72
|
+
end
|
73
|
+
end
|
74
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_LongestSubsequence < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = LongestSubsequence.new('test')
|
11
|
+
@empty = LongestSubsequence.new('')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_empty_subsequence
|
15
|
+
assert_equal 0, @empty.match('')
|
16
|
+
assert_equal 0, @empty.match('a')
|
17
|
+
assert_equal 0, @small.match('')
|
18
|
+
assert_equal 0, @empty.match('not empty')
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_small_subsequence
|
22
|
+
assert_equal 4, @small.match('test')
|
23
|
+
assert_equal 4, @small.match('testa')
|
24
|
+
assert_equal 4, @small.match('atest')
|
25
|
+
assert_equal 4, @small.match('teast')
|
26
|
+
assert_equal 3, @small.match('est')
|
27
|
+
assert_equal 3, @small.match('tes')
|
28
|
+
assert_equal 3, @small.match('tst')
|
29
|
+
assert_equal 3, @small.match('best')
|
30
|
+
assert_equal 3, @small.match('tost')
|
31
|
+
assert_equal 3, @small.match('tesa')
|
32
|
+
assert_equal 2, @small.match('taex')
|
33
|
+
assert_equal 1, @small.match('aaatbbb')
|
34
|
+
assert_equal 1, @small.match('aaasbbb')
|
35
|
+
assert_equal 4, @small.match('aaatestbbb')
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_similar
|
39
|
+
assert_in_delta 1, @empty.similar(''), D
|
40
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
41
|
+
assert_in_delta 0.0, @small.similar(''), D
|
42
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
43
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
44
|
+
assert_in_delta 0.8, @small.similar('atest'), D
|
45
|
+
assert_in_delta 0.8, @small.similar('teast'), D
|
46
|
+
assert_in_delta 0.75, @small.similar('est'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
48
|
+
assert_in_delta 0.75, @small.similar('tst'), D
|
49
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
50
|
+
assert_in_delta 0.75, @small.similar('tost'), D
|
51
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
52
|
+
assert_in_delta 0.50, @small.similar('taex'), D
|
53
|
+
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
54
|
+
assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
|
55
|
+
end
|
56
|
+
end
|
57
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_LongestSubstring < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = LongestSubstring.new('test')
|
11
|
+
@empty = LongestSubstring.new('')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_empty_substring
|
15
|
+
assert_in_delta 0, @empty.match(''), D
|
16
|
+
assert_in_delta 0, @empty.match('a'), D
|
17
|
+
assert_in_delta 0, @small.match(''), D
|
18
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_small_substring
|
22
|
+
assert_in_delta 4, @small.match('test'), D
|
23
|
+
assert_in_delta 4, @small.match('testa'), D
|
24
|
+
assert_in_delta 4, @small.match('atest'), D
|
25
|
+
assert_in_delta 2, @small.match('teast'), D
|
26
|
+
assert_in_delta 3, @small.match('est'), D
|
27
|
+
assert_in_delta 3, @small.match('tes'), D
|
28
|
+
assert_in_delta 2, @small.match('tst'), D
|
29
|
+
assert_in_delta 3, @small.match('best'), D
|
30
|
+
assert_in_delta 2, @small.match('tost'), D
|
31
|
+
assert_in_delta 3, @small.match('tesa'), D
|
32
|
+
assert_in_delta 1, @small.match('taex'), D
|
33
|
+
assert_in_delta 1, @small.match('aaatbbb'), D
|
34
|
+
assert_in_delta 1, @small.match('aaasbbb'), D
|
35
|
+
assert_in_delta 4, @small.match('aaatestbbb'), D
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_similar
|
39
|
+
assert_in_delta 1, @empty.similar(''), D
|
40
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
41
|
+
assert_in_delta 0.0, @small.similar(''), D
|
42
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
43
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
44
|
+
assert_in_delta 0.8, @small.similar('atest'), D
|
45
|
+
assert_in_delta 0.4, @small.similar('teast'), D
|
46
|
+
assert_in_delta 0.75, @small.similar('est'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
48
|
+
assert_in_delta 0.5, @small.similar('tst'), D
|
49
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
50
|
+
assert_in_delta 0.5, @small.similar('tost'), D
|
51
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
52
|
+
assert_in_delta 0.25, @small.similar('taex'), D
|
53
|
+
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
54
|
+
assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
|
55
|
+
end
|
56
|
+
end
|
57
|
+
# vim: set et sw=2 ts=2:
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TC_PairDistance < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@single = PairDistance.new('test')
|
11
|
+
@empty = PairDistance.new('')
|
12
|
+
@france = PairDistance.new('republic of france')
|
13
|
+
@germany = PairDistance.new('federal republic of germany')
|
14
|
+
@csv = PairDistance.new('foo,bar,baz')
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_empty
|
18
|
+
assert_in_delta 1, @empty.match(''), D
|
19
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
20
|
+
assert_in_delta 1, @empty.similar(''), D
|
21
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_countries
|
25
|
+
assert_in_delta 0.5555555, @france.match('france'), D
|
26
|
+
assert_in_delta 0.1052631, @france.match('germany'), D
|
27
|
+
assert_in_delta 0.4615384, @germany.match('germany'), D
|
28
|
+
assert_in_delta 0.16, @germany.match('france'), D
|
29
|
+
assert_in_delta 0.6829268,
|
30
|
+
@germany.match('german democratic republic'), D
|
31
|
+
assert_in_delta 0.72,
|
32
|
+
@france.match('french republic'), D
|
33
|
+
assert_in_delta 0.4375,
|
34
|
+
@germany.match('french republic'), D
|
35
|
+
assert_in_delta 0.5294117,
|
36
|
+
@france.match('german democratic republic'), D
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_single
|
40
|
+
assert_in_delta 0, @single.match(''), D
|
41
|
+
assert_in_delta 1, @single.match('test'), D
|
42
|
+
assert_in_delta 0.8571428, @single.match('testa'), D
|
43
|
+
assert_in_delta 0.8571428, @single.match('atest'), D
|
44
|
+
assert_in_delta 0.5714285, @single.match('teast'), D
|
45
|
+
assert_in_delta 0.8, @single.match('est'), D
|
46
|
+
assert_in_delta 0.8, @single.match('tes'), D
|
47
|
+
assert_in_delta 0.4, @single.match('tst'), D
|
48
|
+
assert_in_delta 0.6666666, @single.match('best'), D
|
49
|
+
assert_in_delta 0.3333333, @single.match('tost'), D
|
50
|
+
assert_in_delta 0.6666666, @single.match('tesa'), D
|
51
|
+
assert_in_delta 0.0, @single.match('taex'), D
|
52
|
+
assert_in_delta 0.5, @single.match('aaatestbbb'), D
|
53
|
+
assert_in_delta 0.6, @single.match('aaa test bbb'), D
|
54
|
+
assert_in_delta 0.6, @single.match('test aaa bbb'), D
|
55
|
+
assert_in_delta 0.6, @single.match('bbb aaa test'), D
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_csv
|
59
|
+
assert_in_delta 0, @csv.match('', /,/), D
|
60
|
+
assert_in_delta 0.5, @csv.match('foo', /,/), D
|
61
|
+
assert_in_delta 0.5, @csv.match('bar', /,/), D
|
62
|
+
assert_in_delta 0.5, @csv.match('baz', /,/), D
|
63
|
+
assert_in_delta 0.8, @csv.match('foo,bar', /,/), D
|
64
|
+
assert_in_delta 0.8, @csv.match('bar,foo', /,/), D
|
65
|
+
assert_in_delta 0.8, @csv.match('bar,baz', /,/), D
|
66
|
+
assert_in_delta 0.8, @csv.match('baz,bar', /,/), D
|
67
|
+
assert_in_delta 0.8, @csv.match('foo,baz', /,/), D
|
68
|
+
assert_in_delta 0.8, @csv.match('baz,foo', /,/), D
|
69
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', /,/), D
|
70
|
+
assert_in_delta 1, @csv.match('foo,baz,bar', /,/), D
|
71
|
+
assert_in_delta 1, @csv.match('baz,foo,bar', /,/), D
|
72
|
+
assert_in_delta 1, @csv.match('baz,bar,foo', /,/), D
|
73
|
+
assert_in_delta 1, @csv.match('bar,foo,baz', /,/), D
|
74
|
+
assert_in_delta 1, @csv.match('bar,baz,foo', /,/), D
|
75
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', nil), D
|
76
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar', nil), D
|
77
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar'), D
|
78
|
+
assert_in_delta 0.9, @csv.similar('foo,baz,bar'), D
|
79
|
+
end
|
80
|
+
end
|
81
|
+
# vim: set et sw=2 ts=2:
|