amatch 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,11 @@
1
1
  #
2
- ## $Id: extconf.rb,v 1.1.1.1 2004/09/27 19:23:42 flori Exp $
2
+ ## $Id: extconf.rb,v 1.2 2005/04/22 00:04:15 flori Exp $
3
3
  #
4
4
 
5
5
  require 'mkmf'
6
+ require 'rbconfig'
7
+ if CONFIG['CC'] = 'gcc'
8
+ CONFIG['CC'] = 'gcc -Wall '
9
+ end
6
10
  create_makefile 'amatch'
11
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,78 @@
1
+ #include "pair.h"
2
+
3
+ #define DEBUG 0
4
+
5
+ static int predict_length(VALUE tokens)
6
+ {
7
+ int i, l, result;
8
+ for (i = 0, result = 0; i < RARRAY(tokens)->len; i++) {
9
+ VALUE t = rb_ary_entry(tokens, i);
10
+ l = RSTRING(t)->len - 1;
11
+ if (l > 0) result += l;
12
+ }
13
+ return result;
14
+ }
15
+
16
+ PairArray *PairArray_new(VALUE tokens)
17
+ {
18
+ int i, j, k, len = predict_length(tokens);
19
+ PairArray *pair_array = ALLOC(PairArray);
20
+ Pair *pairs = ALLOC_N(Pair, len);
21
+ MEMZERO(pairs, Pair, len);
22
+ pair_array->pairs = pairs;
23
+ pair_array->len = len;
24
+ for (i = 0, k = 0; i < RARRAY(tokens)->len; i++) {
25
+ VALUE t = rb_ary_entry(tokens, i);
26
+ char *string = RSTRING(t)->ptr;
27
+ for (j = 0; j < RSTRING(t)->len - 1; j++) {
28
+ pairs[k].fst = string[j];
29
+ pairs[k].snd = string[j + 1];
30
+ pairs[k].status = PAIR_ACTIVE;
31
+ k++;
32
+ }
33
+ }
34
+ return pair_array;
35
+ }
36
+
37
+ void pair_array_reactivate(PairArray *self)
38
+ {
39
+ int i;
40
+ for (i = 0; i < self->len; i++) {
41
+ self->pairs[i].status = PAIR_ACTIVE;
42
+ }
43
+ }
44
+
45
+ double pair_array_match(PairArray *self, PairArray *other)
46
+ {
47
+ int i, j, matches = 0;
48
+ int sum = self->len + other->len;
49
+ if (sum == 0) return 1.0;
50
+ for (i = 0; i < self->len; i++) {
51
+ for (j = 0; j < other->len; j++) {
52
+ #if DEBUG
53
+ pair_print(self->pairs[i]);
54
+ putc(' ', stdout);
55
+ pair_print(other->pairs[j]);
56
+ printf(" -> %d\n", pair_equal(self->pairs[i], other->pairs[j]));
57
+ #endif
58
+ if (pair_equal(self->pairs[i], other->pairs[j])) {
59
+ matches++;
60
+ other->pairs[j].status = PAIR_INACTIVE;
61
+ break;
62
+ }
63
+ }
64
+ }
65
+ return ((double) (2 * matches)) / sum;
66
+ }
67
+
68
+ void pair_print(Pair pair)
69
+ {
70
+ printf("%c%c (%d)", pair.fst, pair.snd, pair.status);
71
+ }
72
+
73
+ void pair_array_destroy(PairArray *pair_array)
74
+ {
75
+ free(pair_array->pairs);
76
+ free(pair_array);
77
+ }
78
+ /* vim: set et cindent sw=4 ts=4: */
@@ -0,0 +1,29 @@
1
+ #ifndef PAIR_H_INCLUDED
2
+ #define PAIR_H_INCLUDED
3
+
4
+ #include "ruby.h"
5
+
6
+ enum { PAIR_ACTIVE = 1, PAIR_INACTIVE = 2 };
7
+
8
+ typedef struct PairStruct {
9
+ char fst;
10
+ char snd;
11
+ char status;
12
+ char __align;
13
+ } Pair;
14
+
15
+ typedef struct PairArrayStruct {
16
+ Pair *pairs;
17
+ int len;
18
+ } PairArray;
19
+
20
+ PairArray *PairArray_new(VALUE tokens);
21
+ #define pair_equal(a, b) \
22
+ ((a).fst == (b).fst && (a).snd == (b).snd && ((a).status & (b).status & PAIR_ACTIVE))
23
+ double pair_array_match(PairArray *self, PairArray *other);
24
+ void pair_array_destroy(PairArray *pair_array);
25
+ void pair_print(Pair pair);
26
+ void pair_array_reactivate(PairArray *self);
27
+
28
+ #endif
29
+ /* vim: set et cindent sw=4 ts=4: */
@@ -0,0 +1,24 @@
1
+ CAST2FLOAT amatch.c /^#define CAST2FLOAT(obj) \\$/
2
+ DEF_ALLOCATOR amatch.c /^#define DEF_ALLOCATOR(type) /
3
+ DEF_CONSTRUCTOR amatch.c /^#define DEF_CONSTRUCTOR(klass, type) /
4
+ DEF_ITERATE_STRINGS amatch.c /^#define DEF_ITERATE_STRINGS(type) /
5
+ DEF_PATTERN_ACCESSOR amatch.c /^#define DEF_PATTERN_ACCESSOR(type) /
6
+ DEF_RB_FREE amatch.c /^#define DEF_RB_FREE(klass, type) /
7
+ DEF_RB_READER amatch.c /^#define DEF_RB_READER(type, function, name, conver/
8
+ DEF_RB_WRITER amatch.c /^#define DEF_RB_WRITER(type, function, name, vtype,/
9
+ FLOAT2C amatch.c /^#define FLOAT2C(obj) RFLOAT(obj)->value$/
10
+ GET_STRUCT amatch.c /^#define GET_STRUCT(klass) \\$/
11
+ Init_amatch amatch.c /^void Init_amatch()$/
12
+ Levenshtein_search amatch.c /^static VALUE Levenshtein_search(Levenshtein *amatc/
13
+ PairDistance_match amatch.c /^static VALUE PairDistance_match($/
14
+ amatch_LongestSubstring amatch.c /^static VALUE amatch_LongestSubstring(General *amat/
15
+ hamming amatch.c /^static VALUE hamming(General *amatch, VALUE string/
16
+ longest_subsequence amatch.c /^static VALUE longest_subsequence(General *amatch, /
17
+ rb_Levenshtein_initialize amatch.c /^static VALUE rb_Levenshtein_initialize(VALUE self,/
18
+ rb_Levenshtein_search amatch.c /^static VALUE rb_Levenshtein_search(VALUE self, VAL/
19
+ rb_str_Levenshtein_match amatch.c /^static VALUE rb_str_Levenshtein_match(VALUE self, /
20
+ rb_str_Levenshtein_search amatch.c /^static VALUE rb_str_Levenshtein_search(VALUE self,/
21
+ rb_str_hamming amatch.c /^static VALUE rb_str_hamming(VALUE self, VALUE stri/
22
+ rb_str_longest_subsequence amatch.c /^static VALUE rb_str_longest_subsequence(VALUE self/
23
+ rb_str_longest_substring amatch.c /^static VALUE rb_str_longest_substring(VALUE self, /
24
+ rb_str_pair_distance amatch.c /^static VALUE rb_str_pair_distance(VALUE self, VALU/
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit/ui/console/testrunner'
4
+ require 'test/unit/testsuite'
5
+ $:.unshift File.expand_path(File.dirname($0))
6
+ require 'test_levenshtein'
7
+ require 'test_sellers'
8
+ require 'test_pair_distance'
9
+ require 'test_hamming'
10
+ require 'test_longest_subsequence'
11
+ require 'test_longest_substring'
12
+
13
+ class TS_AllTests
14
+ def self.suite
15
+ suite = Test::Unit::TestSuite.new 'All tests'
16
+ suite << TC_Levenshtein.suite
17
+ suite << TC_Sellers.suite
18
+ suite << TC_PairDistance.suite
19
+ suite << TC_Hamming.suite
20
+ suite << TC_LongestSubsequence.suite
21
+ suite << TC_LongestSubstring.suite
22
+ suite
23
+ end
24
+ end
25
+ Test::Unit::UI::Console::TestRunner.run(TS_AllTests)
26
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,54 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_Hamming < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = Hamming.new('test')
11
+ @empty = Hamming.new('')
12
+ end
13
+
14
+ def test_empty
15
+ assert_in_delta 0, @empty.match(''), D
16
+ assert_in_delta 9, @empty.match('not empty'), D
17
+ assert_in_delta 1, @empty.similar(''), D
18
+ assert_in_delta 0, @empty.similar('not empty'), D
19
+ end
20
+
21
+ def test_small_match
22
+ assert_in_delta 4, @small.match(''), D
23
+ assert_in_delta 0, @small.match('test'), D
24
+ assert_in_delta 1, @small.match('testa'), D
25
+ assert_in_delta 5, @small.match('atest'), D
26
+ assert_in_delta 3, @small.match('teast'), D
27
+ assert_in_delta 4, @small.match('est'), D
28
+ assert_in_delta 1, @small.match('tes'), D
29
+ assert_in_delta 3, @small.match('tst'), D
30
+ assert_in_delta 1, @small.match('best'), D
31
+ assert_in_delta 1, @small.match('tost'), D
32
+ assert_in_delta 1, @small.match('tesa'), D
33
+ assert_in_delta 3, @small.match('taex'), D
34
+ assert_in_delta 9, @small.match('aaatestbbb'), D
35
+ end
36
+
37
+ def test_small_similar
38
+ assert_in_delta 0.0, @small.similar(''), D
39
+ assert_in_delta 1.0, @small.similar('test'), D
40
+ assert_in_delta 0.8, @small.similar('testa'), D
41
+ assert_in_delta 0.0, @small.similar('atest'), D
42
+ assert_in_delta 0.4, @small.similar('teast'), D
43
+ assert_in_delta 0, @small.similar('est'), D
44
+ assert_in_delta 0.75, @small.similar('tes'), D
45
+ assert_in_delta 0.25, @small.similar('tst'), D
46
+ assert_in_delta 0.75, @small.similar('best'), D
47
+ assert_in_delta 0.75, @small.similar('tost'), D
48
+ assert_in_delta 0.75, @small.similar('tesa'), D
49
+ assert_in_delta 0.25, @small.similar('taex'), D
50
+ assert_in_delta 0.1, @small.similar('aaatestbbb'), D
51
+ assert_in_delta 0.8, @small.pattern.hamming_similar('testa'), D
52
+ end
53
+ end
54
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,74 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_Levenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @empty = Levenshtein.new('')
11
+ @simple = Levenshtein.new('test')
12
+ end
13
+
14
+ def test_match
15
+ assert_equal 4, @simple.match('')
16
+ assert_equal 0, @simple.match('test')
17
+ assert_equal 0, @simple.match('test')
18
+ assert_equal 1, @simple.match('testa')
19
+ assert_equal 1, @simple.match('atest')
20
+ assert_equal 1, @simple.match('teast')
21
+ assert_equal 1, @simple.match('est')
22
+ assert_equal 1, @simple.match('tes')
23
+ assert_equal 1, @simple.match('tst')
24
+ assert_equal 1, @simple.match('best')
25
+ assert_equal 1, @simple.match('tost')
26
+ assert_equal 1, @simple.match('tesa')
27
+ assert_equal 3, @simple.match('taex')
28
+ assert_equal 6, @simple.match('aaatestbbb')
29
+ end
30
+
31
+ def test_search
32
+ assert_equal 4, @simple.search('')
33
+ assert_equal 0, @empty.search('')
34
+ assert_equal 0, @empty.search('test')
35
+ assert_equal 0, @simple.search('aaatestbbb')
36
+ assert_equal 3, @simple.search('aaataexbbb')
37
+ assert_equal 4, @simple.search('aaaaaaaaa')
38
+ end
39
+
40
+ def test_array_result
41
+ assert_equal [2, 0], @simple.match(["tets", "test"])
42
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
43
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
44
+ end
45
+
46
+ def test_pattern_setting
47
+ assert_raises(TypeError) { @simple.pattern = :something }
48
+ assert_equal 0, @simple.match('test')
49
+ @simple.pattern = ''
50
+ assert_equal 4, @simple.match('test')
51
+ @simple.pattern = 'test'
52
+ assert_equal 0, @simple.match('test')
53
+ end
54
+
55
+ def test_similar
56
+ assert_in_delta 1, @empty.similar(''), D
57
+ assert_in_delta 0, @empty.similar('not empty'), D
58
+ assert_in_delta 0.0, @simple.similar(''), D
59
+ assert_in_delta 1.0, @simple.similar('test'), D
60
+ assert_in_delta 0.8, @simple.similar('testa'), D
61
+ assert_in_delta 0.8, @simple.similar('atest'), D
62
+ assert_in_delta 0.8, @simple.similar('teast'), D
63
+ assert_in_delta 0.75, @simple.similar('est'), D
64
+ assert_in_delta 0.75, @simple.similar('tes'), D
65
+ assert_in_delta 0.75, @simple.similar('tst'), D
66
+ assert_in_delta 0.75, @simple.similar('best'), D
67
+ assert_in_delta 0.75, @simple.similar('tost'), D
68
+ assert_in_delta 0.75, @simple.similar('tesa'), D
69
+ assert_in_delta 0.25, @simple.similar('taex'), D
70
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
71
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
72
+ end
73
+ end
74
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,57 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_LongestSubsequence < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = LongestSubsequence.new('test')
11
+ @empty = LongestSubsequence.new('')
12
+ end
13
+
14
+ def test_empty_subsequence
15
+ assert_equal 0, @empty.match('')
16
+ assert_equal 0, @empty.match('a')
17
+ assert_equal 0, @small.match('')
18
+ assert_equal 0, @empty.match('not empty')
19
+ end
20
+
21
+ def test_small_subsequence
22
+ assert_equal 4, @small.match('test')
23
+ assert_equal 4, @small.match('testa')
24
+ assert_equal 4, @small.match('atest')
25
+ assert_equal 4, @small.match('teast')
26
+ assert_equal 3, @small.match('est')
27
+ assert_equal 3, @small.match('tes')
28
+ assert_equal 3, @small.match('tst')
29
+ assert_equal 3, @small.match('best')
30
+ assert_equal 3, @small.match('tost')
31
+ assert_equal 3, @small.match('tesa')
32
+ assert_equal 2, @small.match('taex')
33
+ assert_equal 1, @small.match('aaatbbb')
34
+ assert_equal 1, @small.match('aaasbbb')
35
+ assert_equal 4, @small.match('aaatestbbb')
36
+ end
37
+
38
+ def test_similar
39
+ assert_in_delta 1, @empty.similar(''), D
40
+ assert_in_delta 0, @empty.similar('not empty'), D
41
+ assert_in_delta 0.0, @small.similar(''), D
42
+ assert_in_delta 1.0, @small.similar('test'), D
43
+ assert_in_delta 0.8, @small.similar('testa'), D
44
+ assert_in_delta 0.8, @small.similar('atest'), D
45
+ assert_in_delta 0.8, @small.similar('teast'), D
46
+ assert_in_delta 0.75, @small.similar('est'), D
47
+ assert_in_delta 0.75, @small.similar('tes'), D
48
+ assert_in_delta 0.75, @small.similar('tst'), D
49
+ assert_in_delta 0.75, @small.similar('best'), D
50
+ assert_in_delta 0.75, @small.similar('tost'), D
51
+ assert_in_delta 0.75, @small.similar('tesa'), D
52
+ assert_in_delta 0.50, @small.similar('taex'), D
53
+ assert_in_delta 0.4, @small.similar('aaatestbbb'), D
54
+ assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
55
+ end
56
+ end
57
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,57 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_LongestSubstring < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = LongestSubstring.new('test')
11
+ @empty = LongestSubstring.new('')
12
+ end
13
+
14
+ def test_empty_substring
15
+ assert_in_delta 0, @empty.match(''), D
16
+ assert_in_delta 0, @empty.match('a'), D
17
+ assert_in_delta 0, @small.match(''), D
18
+ assert_in_delta 0, @empty.match('not empty'), D
19
+ end
20
+
21
+ def test_small_substring
22
+ assert_in_delta 4, @small.match('test'), D
23
+ assert_in_delta 4, @small.match('testa'), D
24
+ assert_in_delta 4, @small.match('atest'), D
25
+ assert_in_delta 2, @small.match('teast'), D
26
+ assert_in_delta 3, @small.match('est'), D
27
+ assert_in_delta 3, @small.match('tes'), D
28
+ assert_in_delta 2, @small.match('tst'), D
29
+ assert_in_delta 3, @small.match('best'), D
30
+ assert_in_delta 2, @small.match('tost'), D
31
+ assert_in_delta 3, @small.match('tesa'), D
32
+ assert_in_delta 1, @small.match('taex'), D
33
+ assert_in_delta 1, @small.match('aaatbbb'), D
34
+ assert_in_delta 1, @small.match('aaasbbb'), D
35
+ assert_in_delta 4, @small.match('aaatestbbb'), D
36
+ end
37
+
38
+ def test_similar
39
+ assert_in_delta 1, @empty.similar(''), D
40
+ assert_in_delta 0, @empty.similar('not empty'), D
41
+ assert_in_delta 0.0, @small.similar(''), D
42
+ assert_in_delta 1.0, @small.similar('test'), D
43
+ assert_in_delta 0.8, @small.similar('testa'), D
44
+ assert_in_delta 0.8, @small.similar('atest'), D
45
+ assert_in_delta 0.4, @small.similar('teast'), D
46
+ assert_in_delta 0.75, @small.similar('est'), D
47
+ assert_in_delta 0.75, @small.similar('tes'), D
48
+ assert_in_delta 0.5, @small.similar('tst'), D
49
+ assert_in_delta 0.75, @small.similar('best'), D
50
+ assert_in_delta 0.5, @small.similar('tost'), D
51
+ assert_in_delta 0.75, @small.similar('tesa'), D
52
+ assert_in_delta 0.25, @small.similar('taex'), D
53
+ assert_in_delta 0.4, @small.similar('aaatestbbb'), D
54
+ assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
55
+ end
56
+ end
57
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,81 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_PairDistance < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @single = PairDistance.new('test')
11
+ @empty = PairDistance.new('')
12
+ @france = PairDistance.new('republic of france')
13
+ @germany = PairDistance.new('federal republic of germany')
14
+ @csv = PairDistance.new('foo,bar,baz')
15
+ end
16
+
17
+ def test_empty
18
+ assert_in_delta 1, @empty.match(''), D
19
+ assert_in_delta 0, @empty.match('not empty'), D
20
+ assert_in_delta 1, @empty.similar(''), D
21
+ assert_in_delta 0, @empty.similar('not empty'), D
22
+ end
23
+
24
+ def test_countries
25
+ assert_in_delta 0.5555555, @france.match('france'), D
26
+ assert_in_delta 0.1052631, @france.match('germany'), D
27
+ assert_in_delta 0.4615384, @germany.match('germany'), D
28
+ assert_in_delta 0.16, @germany.match('france'), D
29
+ assert_in_delta 0.6829268,
30
+ @germany.match('german democratic republic'), D
31
+ assert_in_delta 0.72,
32
+ @france.match('french republic'), D
33
+ assert_in_delta 0.4375,
34
+ @germany.match('french republic'), D
35
+ assert_in_delta 0.5294117,
36
+ @france.match('german democratic republic'), D
37
+ end
38
+
39
+ def test_single
40
+ assert_in_delta 0, @single.match(''), D
41
+ assert_in_delta 1, @single.match('test'), D
42
+ assert_in_delta 0.8571428, @single.match('testa'), D
43
+ assert_in_delta 0.8571428, @single.match('atest'), D
44
+ assert_in_delta 0.5714285, @single.match('teast'), D
45
+ assert_in_delta 0.8, @single.match('est'), D
46
+ assert_in_delta 0.8, @single.match('tes'), D
47
+ assert_in_delta 0.4, @single.match('tst'), D
48
+ assert_in_delta 0.6666666, @single.match('best'), D
49
+ assert_in_delta 0.3333333, @single.match('tost'), D
50
+ assert_in_delta 0.6666666, @single.match('tesa'), D
51
+ assert_in_delta 0.0, @single.match('taex'), D
52
+ assert_in_delta 0.5, @single.match('aaatestbbb'), D
53
+ assert_in_delta 0.6, @single.match('aaa test bbb'), D
54
+ assert_in_delta 0.6, @single.match('test aaa bbb'), D
55
+ assert_in_delta 0.6, @single.match('bbb aaa test'), D
56
+ end
57
+
58
+ def test_csv
59
+ assert_in_delta 0, @csv.match('', /,/), D
60
+ assert_in_delta 0.5, @csv.match('foo', /,/), D
61
+ assert_in_delta 0.5, @csv.match('bar', /,/), D
62
+ assert_in_delta 0.5, @csv.match('baz', /,/), D
63
+ assert_in_delta 0.8, @csv.match('foo,bar', /,/), D
64
+ assert_in_delta 0.8, @csv.match('bar,foo', /,/), D
65
+ assert_in_delta 0.8, @csv.match('bar,baz', /,/), D
66
+ assert_in_delta 0.8, @csv.match('baz,bar', /,/), D
67
+ assert_in_delta 0.8, @csv.match('foo,baz', /,/), D
68
+ assert_in_delta 0.8, @csv.match('baz,foo', /,/), D
69
+ assert_in_delta 1, @csv.match('foo,bar,baz', /,/), D
70
+ assert_in_delta 1, @csv.match('foo,baz,bar', /,/), D
71
+ assert_in_delta 1, @csv.match('baz,foo,bar', /,/), D
72
+ assert_in_delta 1, @csv.match('baz,bar,foo', /,/), D
73
+ assert_in_delta 1, @csv.match('bar,foo,baz', /,/), D
74
+ assert_in_delta 1, @csv.match('bar,baz,foo', /,/), D
75
+ assert_in_delta 1, @csv.match('foo,bar,baz', nil), D
76
+ assert_in_delta 0.9, @csv.match('foo,baz,bar', nil), D
77
+ assert_in_delta 0.9, @csv.match('foo,baz,bar'), D
78
+ assert_in_delta 0.9, @csv.similar('foo,baz,bar'), D
79
+ end
80
+ end
81
+ # vim: set et sw=2 ts=2: