amatch 0.2.5-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+ #ifndef __COMMON_H__
2
+ # define __COMMON_H__
3
+
4
+ #ifndef RSTRING_PTR
5
+ #define RSTRING_PTR(str) (RSTRING(str)->ptr)
6
+ #endif
7
+
8
+ #ifndef RSTRING_LEN
9
+ #define RSTRING_LEN(str) (RSTRING(str)->len)
10
+ #endif
11
+
12
+ #ifndef RARRAY_PTR
13
+ #define RARRAY_PTR(ary) (RARRAY(ary)->ptr)
14
+ #endif
15
+
16
+ #ifndef RARRAY_LEN
17
+ #define RARRAY_LEN(ary) (RARRAY(ary)->len)
18
+ #endif
19
+
20
+ #ifndef RFLOAT_VALUE
21
+ #define RFLOAT_VALUE(val) (RFLOAT(val)->value)
22
+ #endif
23
+
24
+
25
+ #endif
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+ require 'rbconfig'
3
+ if CONFIG['CC'] == 'gcc'
4
+ CONFIG['CC'] = 'gcc -Wall '
5
+ end
6
+ create_makefile 'amatch'
@@ -0,0 +1,77 @@
1
+ #include "pair.h"
2
+
3
+ #define DEBUG 0
4
+
5
+ static int predict_length(VALUE tokens)
6
+ {
7
+ int i, l, result;
8
+ for (i = 0, result = 0; i < RARRAY_LEN(tokens); i++) {
9
+ VALUE t = rb_ary_entry(tokens, i);
10
+ l = RSTRING_LEN(t) - 1;
11
+ if (l > 0) result += l;
12
+ }
13
+ return result;
14
+ }
15
+
16
+ PairArray *PairArray_new(VALUE tokens)
17
+ {
18
+ int i, j, k, len = predict_length(tokens);
19
+ PairArray *pair_array = ALLOC(PairArray);
20
+ Pair *pairs = ALLOC_N(Pair, len);
21
+ MEMZERO(pairs, Pair, len);
22
+ pair_array->pairs = pairs;
23
+ pair_array->len = len;
24
+ for (i = 0, k = 0; i < RARRAY_LEN(tokens); i++) {
25
+ VALUE t = rb_ary_entry(tokens, i);
26
+ char *string = RSTRING_PTR(t);
27
+ for (j = 0; j < RSTRING_LEN(t) - 1; j++) {
28
+ pairs[k].fst = string[j];
29
+ pairs[k].snd = string[j + 1];
30
+ pairs[k].status = PAIR_ACTIVE;
31
+ k++;
32
+ }
33
+ }
34
+ return pair_array;
35
+ }
36
+
37
+ void pair_array_reactivate(PairArray *self)
38
+ {
39
+ int i;
40
+ for (i = 0; i < self->len; i++) {
41
+ self->pairs[i].status = PAIR_ACTIVE;
42
+ }
43
+ }
44
+
45
+ double pair_array_match(PairArray *self, PairArray *other)
46
+ {
47
+ int i, j, matches = 0;
48
+ int sum = self->len + other->len;
49
+ if (sum == 0) return 1.0;
50
+ for (i = 0; i < self->len; i++) {
51
+ for (j = 0; j < other->len; j++) {
52
+ #if DEBUG
53
+ pair_print(self->pairs[i]);
54
+ putc(' ', stdout);
55
+ pair_print(other->pairs[j]);
56
+ printf(" -> %d\n", pair_equal(self->pairs[i], other->pairs[j]));
57
+ #endif
58
+ if (pair_equal(self->pairs[i], other->pairs[j])) {
59
+ matches++;
60
+ other->pairs[j].status = PAIR_INACTIVE;
61
+ break;
62
+ }
63
+ }
64
+ }
65
+ return ((double) (2 * matches)) / sum;
66
+ }
67
+
68
+ void pair_print(Pair pair)
69
+ {
70
+ printf("%c%c (%d)", pair.fst, pair.snd, pair.status);
71
+ }
72
+
73
+ void pair_array_destroy(PairArray *pair_array)
74
+ {
75
+ free(pair_array->pairs);
76
+ free(pair_array);
77
+ }
@@ -0,0 +1,29 @@
1
+ #ifndef PAIR_H_INCLUDED
2
+ #define PAIR_H_INCLUDED
3
+
4
+ #include "ruby.h"
5
+ #include "common.h"
6
+
7
+ enum { PAIR_ACTIVE = 1, PAIR_INACTIVE = 2 };
8
+
9
+ typedef struct PairStruct {
10
+ char fst;
11
+ char snd;
12
+ char status;
13
+ char __align;
14
+ } Pair;
15
+
16
+ typedef struct PairArrayStruct {
17
+ Pair *pairs;
18
+ int len;
19
+ } PairArray;
20
+
21
+ PairArray *PairArray_new(VALUE tokens);
22
+ #define pair_equal(a, b) \
23
+ ((a).fst == (b).fst && (a).snd == (b).snd && ((a).status & (b).status & PAIR_ACTIVE))
24
+ double pair_array_match(PairArray *self, PairArray *other);
25
+ void pair_array_destroy(PairArray *pair_array);
26
+ void pair_print(Pair pair);
27
+ void pair_array_reactivate(PairArray *self);
28
+
29
+ #endif
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbconfig'
4
+ include Config
5
+ require 'fileutils'
6
+ include FileUtils::Verbose
7
+
8
+ MAKE = ENV['MAKE'] || %w[gmake make].find { |c| system(c, '-v') }
9
+
10
+ bindir = CONFIG['bindir']
11
+ archdir = CONFIG['sitearchdir']
12
+ libdir = CONFIG['sitelibdir']
13
+ dlext = CONFIG['DLEXT']
14
+ cd 'ext' do
15
+ system 'ruby extconf.rb' or exit 1
16
+ system "#{MAKE}" or exit 1
17
+ mkdir_p archdir
18
+ install "amatch.#{dlext}", archdir
19
+ end
20
+ cd 'bin' do
21
+ filename = 'edit_json.rb'
22
+ install('agrep.rb', bindir)
23
+ end
24
+ cd 'lib/amatch' do
25
+ mkdir_p d = File.join(libdir, 'amatch')
26
+ install 'version.rb', d
27
+ end
28
+ warn " *** Installed amatch extension."
Binary file
@@ -0,0 +1,8 @@
1
+ module Amatch
2
+ # Amatch version
3
+ VERSION = '0.2.5'
4
+ VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
5
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
7
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
8
+ end
@@ -0,0 +1,58 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestHamming < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = Hamming.new('test')
11
+ @empty = Hamming.new('')
12
+ @long = Hamming.new('A' * 160)
13
+ end
14
+
15
+ def test_empty
16
+ assert_in_delta 0, @empty.match(''), D
17
+ assert_in_delta 9, @empty.match('not empty'), D
18
+ assert_in_delta 1, @empty.similar(''), D
19
+ assert_in_delta 0, @empty.similar('not empty'), D
20
+ end
21
+
22
+ def test_small_match
23
+ assert_in_delta 4, @small.match(''), D
24
+ assert_in_delta 0, @small.match('test'), D
25
+ assert_in_delta 1, @small.match('testa'), D
26
+ assert_in_delta 5, @small.match('atest'), D
27
+ assert_in_delta 3, @small.match('teast'), D
28
+ assert_in_delta 4, @small.match('est'), D
29
+ assert_in_delta 1, @small.match('tes'), D
30
+ assert_in_delta 3, @small.match('tst'), D
31
+ assert_in_delta 1, @small.match('best'), D
32
+ assert_in_delta 1, @small.match('tost'), D
33
+ assert_in_delta 1, @small.match('tesa'), D
34
+ assert_in_delta 3, @small.match('taex'), D
35
+ assert_in_delta 9, @small.match('aaatestbbb'), D
36
+ end
37
+
38
+ def test_small_similar
39
+ assert_in_delta 0.0, @small.similar(''), D
40
+ assert_in_delta 1.0, @small.similar('test'), D
41
+ assert_in_delta 0.8, @small.similar('testa'), D
42
+ assert_in_delta 0.0, @small.similar('atest'), D
43
+ assert_in_delta 0.4, @small.similar('teast'), D
44
+ assert_in_delta 0, @small.similar('est'), D
45
+ assert_in_delta 0.75, @small.similar('tes'), D
46
+ assert_in_delta 0.25, @small.similar('tst'), D
47
+ assert_in_delta 0.75, @small.similar('best'), D
48
+ assert_in_delta 0.75, @small.similar('tost'), D
49
+ assert_in_delta 0.75, @small.similar('tesa'), D
50
+ assert_in_delta 0.25, @small.similar('taex'), D
51
+ assert_in_delta 0.1, @small.similar('aaatestbbb'), D
52
+ assert_in_delta 0.8, @small.pattern.hamming_similar('testa'), D
53
+ end
54
+
55
+ def test_long
56
+ assert_in_delta 1.0, @long.similar(@long.pattern), D
57
+ end
58
+ end
@@ -0,0 +1,29 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + "/../ext/amatch"
3
+
4
+ class TestJaro < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.0005
8
+
9
+ def setup
10
+ @martha = Jaro.new('Martha')
11
+ @dwayne = Jaro.new('dwayne')
12
+ @dixon = Jaro.new('DIXON')
13
+ @one = Jaro.new('one')
14
+ end
15
+
16
+ def test_case
17
+ @martha.ignore_case = true
18
+ assert_in_delta 0.944, @martha.match('MARHTA'), D
19
+ @martha.ignore_case = false
20
+ assert_in_delta 0.444, @martha.match('MARHTA'), D
21
+ end
22
+
23
+ def test_match
24
+ assert_in_delta 0.944, @martha.match('MARHTA'), D
25
+ assert_in_delta 0.822, @dwayne.match('DUANE'), D
26
+ assert_in_delta 0.767, @dixon.match('DICKSONX'), D
27
+ assert_in_delta 0.667, @one.match('orange'), D
28
+ end
29
+ end
@@ -0,0 +1,38 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestJaroWinkler < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.0005
8
+
9
+ def setup
10
+ @martha = JaroWinkler.new('Martha')
11
+ @dwayne = JaroWinkler.new('dwayne')
12
+ @dixon = JaroWinkler.new('DIXON')
13
+ @one = JaroWinkler.new("one")
14
+ end
15
+
16
+ def test_case
17
+ @martha.ignore_case = true
18
+ assert_in_delta 0.961, @martha.match('MARHTA'), D
19
+ @martha.ignore_case = false
20
+ assert_in_delta 0.500, @martha.match('MARHTA'), D
21
+ end
22
+
23
+ def test_match
24
+ assert_in_delta 0.961, @martha.match('MARHTA'), D
25
+ assert_in_delta 0.840, @dwayne.match('DUANE'), D
26
+ assert_in_delta 0.813, @dixon.match('DICKSONX'), D
27
+ assert_in_delta 0, @one.match('two'), D
28
+ assert_in_delta 0.700, @one.match('orange'), D
29
+ end
30
+
31
+ def test_scaling_factor
32
+ assert_in_delta 0.1, @martha.scaling_factor, 0.0000001
33
+ @martha.scaling_factor = 0.2
34
+ assert_in_delta 0.978, @martha.match('MARHTA'), D
35
+ @martha.scaling_factor = 0.5 # this is far too high
36
+ assert_in_delta 1.028, @martha.match('MARHTA'), D
37
+ end
38
+ end
@@ -0,0 +1,83 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestLevenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ def setup
8
+ @d = 0.000001
9
+ @empty = Levenshtein.new('')
10
+ @simple = Levenshtein.new('test')
11
+ @long = Levenshtein.new('A' * 160)
12
+ end
13
+
14
+ def test_match
15
+ assert_equal 4, @simple.match('')
16
+ assert_equal 0, @simple.match('test')
17
+ assert_equal 0, @simple.match('test')
18
+ assert_equal 1, @simple.match('testa')
19
+ assert_equal 1, @simple.match('atest')
20
+ assert_equal 1, @simple.match('teast')
21
+ assert_equal 1, @simple.match('est')
22
+ assert_equal 1, @simple.match('tes')
23
+ assert_equal 1, @simple.match('tst')
24
+ assert_equal 1, @simple.match('best')
25
+ assert_equal 1, @simple.match('tost')
26
+ assert_equal 1, @simple.match('tesa')
27
+ assert_equal 3, @simple.match('taex')
28
+ assert_equal 6, @simple.match('aaatestbbb')
29
+ end
30
+
31
+ def test_search
32
+ assert_equal 4, @simple.search('')
33
+ assert_equal 0, @empty.search('')
34
+ assert_equal 0, @empty.search('test')
35
+ assert_equal 0, @simple.search('aaatestbbb')
36
+ assert_equal 3, @simple.search('aaataexbbb')
37
+ assert_equal 4, @simple.search('aaaaaaaaa')
38
+ end
39
+
40
+ def test_array_result
41
+ assert_equal [2, 0], @simple.match(["tets", "test"])
42
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
43
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
44
+ end
45
+
46
+ def test_pattern_setting
47
+ assert_raises(TypeError) { @simple.pattern = :something }
48
+ assert_equal 0, @simple.match('test')
49
+ @simple.pattern = ''
50
+ assert_equal 4, @simple.match('test')
51
+ @simple.pattern = 'test'
52
+ assert_equal 0, @simple.match('test')
53
+ end
54
+
55
+ def test_similar
56
+ assert_in_delta 1, @empty.similar(''), @d
57
+ assert_in_delta 0, @empty.similar('not empty'), @d
58
+ assert_in_delta 0.0, @simple.similar(''), @d
59
+ assert_in_delta 1.0, @simple.similar('test'), @d
60
+ assert_in_delta 0.8, @simple.similar('testa'), @d
61
+ assert_in_delta 0.8, @simple.similar('atest'), @d
62
+ assert_in_delta 0.8, @simple.similar('teast'), @d
63
+ assert_in_delta 0.75, @simple.similar('est'), @d
64
+ assert_in_delta 0.75, @simple.similar('tes'), @d
65
+ assert_in_delta 0.75, @simple.similar('tst'), @d
66
+ assert_in_delta 0.75, @simple.similar('best'), @d
67
+ assert_in_delta 0.75, @simple.similar('tost'), @d
68
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
69
+ assert_in_delta 0.25, @simple.similar('taex'), @d
70
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
71
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), @d
72
+ end
73
+
74
+ def test_long
75
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
76
+ end
77
+
78
+ def test_long2
79
+ a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
80
+ b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
81
+ assert a.levenshtein_similar(b)
82
+ end
83
+ end
@@ -0,0 +1,61 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestLongestSubsequence < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = LongestSubsequence.new('test')
11
+ @empty = LongestSubsequence.new('')
12
+ @long = LongestSubsequence.new('A' * 160)
13
+ end
14
+
15
+ def test_empty_subsequence
16
+ assert_equal 0, @empty.match('')
17
+ assert_equal 0, @empty.match('a')
18
+ assert_equal 0, @small.match('')
19
+ assert_equal 0, @empty.match('not empty')
20
+ end
21
+
22
+ def test_small_subsequence
23
+ assert_equal 4, @small.match('test')
24
+ assert_equal 4, @small.match('testa')
25
+ assert_equal 4, @small.match('atest')
26
+ assert_equal 4, @small.match('teast')
27
+ assert_equal 3, @small.match('est')
28
+ assert_equal 3, @small.match('tes')
29
+ assert_equal 3, @small.match('tst')
30
+ assert_equal 3, @small.match('best')
31
+ assert_equal 3, @small.match('tost')
32
+ assert_equal 3, @small.match('tesa')
33
+ assert_equal 2, @small.match('taex')
34
+ assert_equal 1, @small.match('aaatbbb')
35
+ assert_equal 1, @small.match('aaasbbb')
36
+ assert_equal 4, @small.match('aaatestbbb')
37
+ end
38
+
39
+ def test_similar
40
+ assert_in_delta 1, @empty.similar(''), D
41
+ assert_in_delta 0, @empty.similar('not empty'), D
42
+ assert_in_delta 0.0, @small.similar(''), D
43
+ assert_in_delta 1.0, @small.similar('test'), D
44
+ assert_in_delta 0.8, @small.similar('testa'), D
45
+ assert_in_delta 0.8, @small.similar('atest'), D
46
+ assert_in_delta 0.8, @small.similar('teast'), D
47
+ assert_in_delta 0.75, @small.similar('est'), D
48
+ assert_in_delta 0.75, @small.similar('tes'), D
49
+ assert_in_delta 0.75, @small.similar('tst'), D
50
+ assert_in_delta 0.75, @small.similar('best'), D
51
+ assert_in_delta 0.75, @small.similar('tost'), D
52
+ assert_in_delta 0.75, @small.similar('tesa'), D
53
+ assert_in_delta 0.50, @small.similar('taex'), D
54
+ assert_in_delta 0.4, @small.similar('aaatestbbb'), D
55
+ assert_in_delta 0.75, @small.pattern.longest_subsequence_similar('est'), D
56
+ end
57
+
58
+ def test_long
59
+ assert_in_delta 1.0, @long.similar(@long.pattern), D
60
+ end
61
+ end