amatch 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,11 @@
1
1
  #
2
- ## $Id: extconf.rb,v 1.1.1.1 2004/09/27 19:23:42 flori Exp $
2
+ ## $Id: extconf.rb,v 1.2 2005/04/22 00:04:15 flori Exp $
3
3
  #
4
4
 
5
5
  require 'mkmf'
6
+ require 'rbconfig'
7
+ if CONFIG['CC'] = 'gcc'
8
+ CONFIG['CC'] = 'gcc -Wall '
9
+ end
6
10
  create_makefile 'amatch'
11
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,78 @@
1
+ #include "pair.h"
2
+
3
+ #define DEBUG 0
4
+
5
+ static int predict_length(VALUE tokens)
6
+ {
7
+ int i, l, result;
8
+ for (i = 0, result = 0; i < RARRAY(tokens)->len; i++) {
9
+ VALUE t = rb_ary_entry(tokens, i);
10
+ l = RSTRING(t)->len - 1;
11
+ if (l > 0) result += l;
12
+ }
13
+ return result;
14
+ }
15
+
16
+ PairArray *PairArray_new(VALUE tokens)
17
+ {
18
+ int i, j, k, len = predict_length(tokens);
19
+ PairArray *pair_array = ALLOC(PairArray);
20
+ Pair *pairs = ALLOC_N(Pair, len);
21
+ MEMZERO(pairs, Pair, len);
22
+ pair_array->pairs = pairs;
23
+ pair_array->len = len;
24
+ for (i = 0, k = 0; i < RARRAY(tokens)->len; i++) {
25
+ VALUE t = rb_ary_entry(tokens, i);
26
+ char *string = RSTRING(t)->ptr;
27
+ for (j = 0; j < RSTRING(t)->len - 1; j++) {
28
+ pairs[k].fst = string[j];
29
+ pairs[k].snd = string[j + 1];
30
+ pairs[k].status = PAIR_ACTIVE;
31
+ k++;
32
+ }
33
+ }
34
+ return pair_array;
35
+ }
36
+
37
+ void pair_array_reactivate(PairArray *self)
38
+ {
39
+ int i;
40
+ for (i = 0; i < self->len; i++) {
41
+ self->pairs[i].status = PAIR_ACTIVE;
42
+ }
43
+ }
44
+
45
+ double pair_array_match(PairArray *self, PairArray *other)
46
+ {
47
+ int i, j, matches = 0;
48
+ int sum = self->len + other->len;
49
+ if (sum == 0) return 1.0;
50
+ for (i = 0; i < self->len; i++) {
51
+ for (j = 0; j < other->len; j++) {
52
+ #if DEBUG
53
+ pair_print(self->pairs[i]);
54
+ putc(' ', stdout);
55
+ pair_print(other->pairs[j]);
56
+ printf(" -> %d\n", pair_equal(self->pairs[i], other->pairs[j]));
57
+ #endif
58
+ if (pair_equal(self->pairs[i], other->pairs[j])) {
59
+ matches++;
60
+ other->pairs[j].status = PAIR_INACTIVE;
61
+ break;
62
+ }
63
+ }
64
+ }
65
+ return ((double) (2 * matches)) / sum;
66
+ }
67
+
68
+ void pair_print(Pair pair)
69
+ {
70
+ printf("%c%c (%d)", pair.fst, pair.snd, pair.status);
71
+ }
72
+
73
+ void pair_array_destroy(PairArray *pair_array)
74
+ {
75
+ free(pair_array->pairs);
76
+ free(pair_array);
77
+ }
78
+ /* vim: set et cindent sw=4 ts=4: */
@@ -0,0 +1,29 @@
1
+ #ifndef PAIR_H_INCLUDED
2
+ #define PAIR_H_INCLUDED
3
+
4
+ #include "ruby.h"
5
+
6
+ enum { PAIR_ACTIVE = 1, PAIR_INACTIVE = 2 };
7
+
8
+ typedef struct PairStruct {
9
+ char fst;
10
+ char snd;
11
+ char status;
12
+ char __align;
13
+ } Pair;
14
+
15
+ typedef struct PairArrayStruct {
16
+ Pair *pairs;
17
+ int len;
18
+ } PairArray;
19
+
20
+ PairArray *PairArray_new(VALUE tokens);
21
+ #define pair_equal(a, b) \
22
+ ((a).fst == (b).fst && (a).snd == (b).snd && ((a).status & (b).status & PAIR_ACTIVE))
23
+ double pair_array_match(PairArray *self, PairArray *other);
24
+ void pair_array_destroy(PairArray *pair_array);
25
+ void pair_print(Pair pair);
26
+ void pair_array_reactivate(PairArray *self);
27
+
28
+ #endif
29
+ /* vim: set et cindent sw=4 ts=4: */
@@ -0,0 +1,24 @@
1
+ CAST2FLOAT amatch.c /^#define CAST2FLOAT(obj) \\$/
2
+ DEF_ALLOCATOR amatch.c /^#define DEF_ALLOCATOR(type) /
3
+ DEF_CONSTRUCTOR amatch.c /^#define DEF_CONSTRUCTOR(klass, type) /
4
+ DEF_ITERATE_STRINGS amatch.c /^#define DEF_ITERATE_STRINGS(type) /
5
+ DEF_PATTERN_ACCESSOR amatch.c /^#define DEF_PATTERN_ACCESSOR(type) /
6
+ DEF_RB_FREE amatch.c /^#define DEF_RB_FREE(klass, type) /
7
+ DEF_RB_READER amatch.c /^#define DEF_RB_READER(type, function, name, conver/
8
+ DEF_RB_WRITER amatch.c /^#define DEF_RB_WRITER(type, function, name, vtype,/
9
+ FLOAT2C amatch.c /^#define FLOAT2C(obj) RFLOAT(obj)->value$/
10
+ GET_STRUCT amatch.c /^#define GET_STRUCT(klass) \\$/
11
+ Init_amatch amatch.c /^void Init_amatch()$/
12
+ Levenshtein_search amatch.c /^static VALUE Levenshtein_search(Levenshtein *amatc/
13
+ PairDistance_match amatch.c /^static VALUE PairDistance_match($/
14
+ amatch_LongestSubstring amatch.c /^static VALUE amatch_LongestSubstring(General *amat/
15
+ hamming amatch.c /^static VALUE hamming(General *amatch, VALUE string/
16
+ longest_subsequence amatch.c /^static VALUE longest_subsequence(General *amatch, /
17
+ rb_Levenshtein_initialize amatch.c /^static VALUE rb_Levenshtein_initialize(VALUE self,/
18
+ rb_Levenshtein_search amatch.c /^static VALUE rb_Levenshtein_search(VALUE self, VAL/
19
+ rb_str_Levenshtein_match amatch.c /^static VALUE rb_str_Levenshtein_match(VALUE self, /
20
+ rb_str_Levenshtein_search amatch.c /^static VALUE rb_str_Levenshtein_search(VALUE self,/
21
+ rb_str_hamming amatch.c /^static VALUE rb_str_hamming(VALUE self, VALUE stri/
22
+ rb_str_longest_subsequence amatch.c /^static VALUE rb_str_longest_subsequence(VALUE self/
23
+ rb_str_longest_substring amatch.c /^static VALUE rb_str_longest_substring(VALUE self, /
24
+ rb_str_pair_distance amatch.c /^static VALUE rb_str_pair_distance(VALUE self, VALU/
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit/ui/console/testrunner'
4
+ require 'test/unit/testsuite'
5
+ $:.unshift File.expand_path(File.dirname($0))
6
+ require 'test_levenshtein'
7
+ require 'test_sellers'
8
+ require 'test_pair_distance'
9
+ require 'test_hamming'
10
+ require 'test_longest_subsequence'
11
+ require 'test_longest_substring'
12
+
13
+ class TS_AllTests
14
+ def self.suite
15
+ suite = Test::Unit::TestSuite.new 'All tests'
16
+ suite << TC_Levenshtein.suite
17
+ suite << TC_Sellers.suite
18
+ suite << TC_PairDistance.suite
19
+ suite << TC_Hamming.suite
20
+ suite << TC_LongestSubsequence.suite
21
+ suite << TC_LongestSubstring.suite
22
+ suite
23
+ end
24
+ end
25
+ Test::Unit::UI::Console::TestRunner.run(TS_AllTests)
26
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,54 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_Hamming < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = Hamming.new('test')
11
+ @empty = Hamming.new('')
12
+ end
13
+
14
+ def test_empty
15
+ assert_in_delta 0, @empty.match(''), D
16
+ assert_in_delta 9, @empty.match('not empty'), D
17
+ assert_in_delta 1, @empty.similar(''), D
18
+ assert_in_delta 0, @empty.similar('not empty'), D
19
+ end
20
+
21
+ def test_small_match
22
+ assert_in_delta 4, @small.match(''), D
23
+ assert_in_delta 0, @small.match('test'), D
24
+ assert_in_delta 1, @small.match('testa'), D
25
+ assert_in_delta 5, @small.match('atest'), D
26
+ assert_in_delta 3, @small.match('teast'), D
27
+ assert_in_delta 4, @small.match('est'), D
28
+ assert_in_delta 1, @small.match('tes'), D
29
+ assert_in_delta 3, @small.match('tst'), D
30
+ assert_in_delta 1, @small.match('best'), D
31
+ assert_in_delta 1, @small.match('tost'), D
32
+ assert_in_delta 1, @small.match('tesa'), D
33
+ assert_in_delta 3, @small.match('taex'), D
34
+ assert_in_delta 9, @small.match('aaatestbbb'), D
35
+ end
36
+
37
+ def test_small_similar
38
+ assert_in_delta 0.0, @small.similar(''), D
39
+ assert_in_delta 1.0, @small.similar('test'), D
40
+ assert_in_delta 0.8, @small.similar('testa'), D
41
+ assert_in_delta 0.0, @small.similar('atest'), D
42
+ assert_in_delta 0.4, @small.similar('teast'), D
43
+ assert_in_delta 0, @small.similar('est'), D
44
+ assert_in_delta 0.75, @small.similar('tes'), D
45
+ assert_in_delta 0.25, @small.similar('tst'), D
46
+ assert_in_delta 0.75, @small.similar('best'), D
47
+ assert_in_delta 0.75, @small.similar('tost'), D
48
+ assert_in_delta 0.75, @small.similar('tesa'), D
49
+ assert_in_delta 0.25, @small.similar('taex'), D
50
+ assert_in_delta 0.1, @small.similar('aaatestbbb'), D
51
+ assert_in_delta 0.8, @small.pattern.hamming_similar('testa'), D
52
+ end
53
+ end
54
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,74 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_Levenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @empty = Levenshtein.new('')
11
+ @simple = Levenshtein.new('test')
12
+ end
13
+
14
+ def test_match
15
+ assert_equal 4, @simple.match('')
16
+ assert_equal 0, @simple.match('test')
17
+ assert_equal 0, @simple.match('test')
18
+ assert_equal 1, @simple.match('testa')
19
+ assert_equal 1, @simple.match('atest')
20
+ assert_equal 1, @simple.match('teast')
21
+ assert_equal 1, @simple.match('est')
22
+ assert_equal 1, @simple.match('tes')
23
+ assert_equal 1, @simple.match('tst')
24
+ assert_equal 1, @simple.match('best')
25
+ assert_equal 1, @simple.match('tost')
26
+ assert_equal 1, @simple.match('tesa')
27
+ assert_equal 3, @simple.match('taex')
28
+ assert_equal 6, @simple.match('aaatestbbb')
29
+ end
30
+
31
+ def test_search
32
+ assert_equal 4, @simple.search('')
33
+ assert_equal 0, @empty.search('')
34
+ assert_equal 0, @empty.search('test')
35
+ assert_equal 0, @simple.search('aaatestbbb')
36
+ assert_equal 3, @simple.search('aaataexbbb')
37
+ assert_equal 4, @simple.search('aaaaaaaaa')
38
+ end
39
+
40
+ def test_array_result
41
+ assert_equal [2, 0], @simple.match(["tets", "test"])
42
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
43
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
44
+ end
45
+
46
+ def test_pattern_setting
47
+ assert_raises(TypeError) { @simple.pattern = :something }
48
+ assert_equal 0, @simple.match('test')
49
+ @simple.pattern = ''
50
+ assert_equal 4, @simple.match('test')
51
+ @simple.pattern = 'test'
52
+ assert_equal 0, @simple.match('test')
53
+ end
54
+
55
+ def test_similar
56
+ assert_in_delta 1, @empty.similar(''), D
57
+ assert_in_delta 0, @empty.similar('not empty'), D
58
+ assert_in_delta 0.0, @simple.similar(''), D
59
+ assert_in_delta 1.0, @simple.similar('test'), D
60
+ assert_in_delta 0.8, @simple.similar('testa'), D
61
+ assert_in_delta 0.8, @simple.similar('atest'), D
62
+ assert_in_delta 0.8, @simple.similar('teast'), D
63
+ assert_in_delta 0.75, @simple.similar('est'), D
64
+ assert_in_delta 0.75, @simple.similar('tes'), D
65
+ assert_in_delta 0.75, @simple.similar('tst'), D
66
+ assert_in_delta 0.75, @simple.similar('best'), D
67
+ assert_in_delta 0.75, @simple.similar('tost'), D
68
+ assert_in_delta 0.75, @simple.similar('tesa'), D
69
+ assert_in_delta 0.25, @simple.similar('taex'), D
70
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
71
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
72
+ end
73
+ end
74
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,57 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_LongestSubsequence < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = LongestSubsequence.new('test')
11
+ @empty = LongestSubsequence.new('')
12
+ end
13
+
14
+ def test_empty_subsequence
15
+ assert_equal 0, @empty.match('')
16
+ assert_equal 0, @empty.match('a')
17
+ assert_equal 0, @small.match('')
18
+ assert_equal 0, @empty.match('not empty')
19
+ end
20
+
21
+ def test_small_subsequence
22
+ assert_equal 4, @small.match('test')
23
+ assert_equal 4, @small.match('testa')
24
+ assert_equal 4, @small.match('atest')
25
+ assert_equal 4, @small.match('teast')
26
+ assert_equal 3, @small.match('est')
27
+ assert_equal 3, @small.match('tes')
28
+ assert_equal 3, @small.match('tst')
29
+ assert_equal 3, @small.match('best')
30
+ assert_equal 3, @small.match('tost')
31
+ assert_equal 3, @small.match('tesa')
32
+ assert_equal 2, @small.match('taex')
33
+ assert_equal 1, @small.match('aaatbbb')
34
+ assert_equal 1, @small.match('aaasbbb')
35
+ assert_equal 4, @small.match('aaatestbbb')
36
+ end
37
+
38
+ def test_similar
39
+ assert_in_delta 1, @empty.similar(''), D
40
+ assert_in_delta 0, @empty.similar('not empty'), D
41
+ assert_in_delta 0.0, @small.similar(''), D
42
+ assert_in_delta 1.0, @small.similar('test'), D
43
+ assert_in_delta 0.8, @small.similar('testa'), D
44
+ assert_in_delta 0.8, @small.similar('atest'), D
45
+ assert_in_delta 0.8, @small.similar('teast'), D
46
+ assert_in_delta 0.75, @small.similar('est'), D
47
+ assert_in_delta 0.75, @small.similar('tes'), D
48
+ assert_in_delta 0.75, @small.similar('tst'), D
49
+ assert_in_delta 0.75, @small.similar('best'), D
50
+ assert_in_delta 0.75, @small.similar('tost'), D
51
+ assert_in_delta 0.75, @small.similar('tesa'), D
52
+ assert_in_delta 0.50, @small.similar('taex'), D
53
+ assert_in_delta 0.4, @small.similar('aaatestbbb'), D
54
+ assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
55
+ end
56
+ end
57
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,57 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_LongestSubstring < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = LongestSubstring.new('test')
11
+ @empty = LongestSubstring.new('')
12
+ end
13
+
14
+ def test_empty_substring
15
+ assert_in_delta 0, @empty.match(''), D
16
+ assert_in_delta 0, @empty.match('a'), D
17
+ assert_in_delta 0, @small.match(''), D
18
+ assert_in_delta 0, @empty.match('not empty'), D
19
+ end
20
+
21
+ def test_small_substring
22
+ assert_in_delta 4, @small.match('test'), D
23
+ assert_in_delta 4, @small.match('testa'), D
24
+ assert_in_delta 4, @small.match('atest'), D
25
+ assert_in_delta 2, @small.match('teast'), D
26
+ assert_in_delta 3, @small.match('est'), D
27
+ assert_in_delta 3, @small.match('tes'), D
28
+ assert_in_delta 2, @small.match('tst'), D
29
+ assert_in_delta 3, @small.match('best'), D
30
+ assert_in_delta 2, @small.match('tost'), D
31
+ assert_in_delta 3, @small.match('tesa'), D
32
+ assert_in_delta 1, @small.match('taex'), D
33
+ assert_in_delta 1, @small.match('aaatbbb'), D
34
+ assert_in_delta 1, @small.match('aaasbbb'), D
35
+ assert_in_delta 4, @small.match('aaatestbbb'), D
36
+ end
37
+
38
+ def test_similar
39
+ assert_in_delta 1, @empty.similar(''), D
40
+ assert_in_delta 0, @empty.similar('not empty'), D
41
+ assert_in_delta 0.0, @small.similar(''), D
42
+ assert_in_delta 1.0, @small.similar('test'), D
43
+ assert_in_delta 0.8, @small.similar('testa'), D
44
+ assert_in_delta 0.8, @small.similar('atest'), D
45
+ assert_in_delta 0.4, @small.similar('teast'), D
46
+ assert_in_delta 0.75, @small.similar('est'), D
47
+ assert_in_delta 0.75, @small.similar('tes'), D
48
+ assert_in_delta 0.5, @small.similar('tst'), D
49
+ assert_in_delta 0.75, @small.similar('best'), D
50
+ assert_in_delta 0.5, @small.similar('tost'), D
51
+ assert_in_delta 0.75, @small.similar('tesa'), D
52
+ assert_in_delta 0.25, @small.similar('taex'), D
53
+ assert_in_delta 0.4, @small.similar('aaatestbbb'), D
54
+ assert_in_delta 0.75, @small.pattern.levenshtein_similar('est'), D
55
+ end
56
+ end
57
+ # vim: set et sw=2 ts=2:
@@ -0,0 +1,81 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TC_PairDistance < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @single = PairDistance.new('test')
11
+ @empty = PairDistance.new('')
12
+ @france = PairDistance.new('republic of france')
13
+ @germany = PairDistance.new('federal republic of germany')
14
+ @csv = PairDistance.new('foo,bar,baz')
15
+ end
16
+
17
+ def test_empty
18
+ assert_in_delta 1, @empty.match(''), D
19
+ assert_in_delta 0, @empty.match('not empty'), D
20
+ assert_in_delta 1, @empty.similar(''), D
21
+ assert_in_delta 0, @empty.similar('not empty'), D
22
+ end
23
+
24
+ def test_countries
25
+ assert_in_delta 0.5555555, @france.match('france'), D
26
+ assert_in_delta 0.1052631, @france.match('germany'), D
27
+ assert_in_delta 0.4615384, @germany.match('germany'), D
28
+ assert_in_delta 0.16, @germany.match('france'), D
29
+ assert_in_delta 0.6829268,
30
+ @germany.match('german democratic republic'), D
31
+ assert_in_delta 0.72,
32
+ @france.match('french republic'), D
33
+ assert_in_delta 0.4375,
34
+ @germany.match('french republic'), D
35
+ assert_in_delta 0.5294117,
36
+ @france.match('german democratic republic'), D
37
+ end
38
+
39
+ def test_single
40
+ assert_in_delta 0, @single.match(''), D
41
+ assert_in_delta 1, @single.match('test'), D
42
+ assert_in_delta 0.8571428, @single.match('testa'), D
43
+ assert_in_delta 0.8571428, @single.match('atest'), D
44
+ assert_in_delta 0.5714285, @single.match('teast'), D
45
+ assert_in_delta 0.8, @single.match('est'), D
46
+ assert_in_delta 0.8, @single.match('tes'), D
47
+ assert_in_delta 0.4, @single.match('tst'), D
48
+ assert_in_delta 0.6666666, @single.match('best'), D
49
+ assert_in_delta 0.3333333, @single.match('tost'), D
50
+ assert_in_delta 0.6666666, @single.match('tesa'), D
51
+ assert_in_delta 0.0, @single.match('taex'), D
52
+ assert_in_delta 0.5, @single.match('aaatestbbb'), D
53
+ assert_in_delta 0.6, @single.match('aaa test bbb'), D
54
+ assert_in_delta 0.6, @single.match('test aaa bbb'), D
55
+ assert_in_delta 0.6, @single.match('bbb aaa test'), D
56
+ end
57
+
58
+ def test_csv
59
+ assert_in_delta 0, @csv.match('', /,/), D
60
+ assert_in_delta 0.5, @csv.match('foo', /,/), D
61
+ assert_in_delta 0.5, @csv.match('bar', /,/), D
62
+ assert_in_delta 0.5, @csv.match('baz', /,/), D
63
+ assert_in_delta 0.8, @csv.match('foo,bar', /,/), D
64
+ assert_in_delta 0.8, @csv.match('bar,foo', /,/), D
65
+ assert_in_delta 0.8, @csv.match('bar,baz', /,/), D
66
+ assert_in_delta 0.8, @csv.match('baz,bar', /,/), D
67
+ assert_in_delta 0.8, @csv.match('foo,baz', /,/), D
68
+ assert_in_delta 0.8, @csv.match('baz,foo', /,/), D
69
+ assert_in_delta 1, @csv.match('foo,bar,baz', /,/), D
70
+ assert_in_delta 1, @csv.match('foo,baz,bar', /,/), D
71
+ assert_in_delta 1, @csv.match('baz,foo,bar', /,/), D
72
+ assert_in_delta 1, @csv.match('baz,bar,foo', /,/), D
73
+ assert_in_delta 1, @csv.match('bar,foo,baz', /,/), D
74
+ assert_in_delta 1, @csv.match('bar,baz,foo', /,/), D
75
+ assert_in_delta 1, @csv.match('foo,bar,baz', nil), D
76
+ assert_in_delta 0.9, @csv.match('foo,baz,bar', nil), D
77
+ assert_in_delta 0.9, @csv.match('foo,baz,bar'), D
78
+ assert_in_delta 0.9, @csv.similar('foo,baz,bar'), D
79
+ end
80
+ end
81
+ # vim: set et sw=2 ts=2: