amatch 0.2.5-x86-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ #ifndef __COMMON_H__
2
+ # define __COMMON_H__
3
+
4
+ #ifndef RSTRING_PTR
5
+ #define RSTRING_PTR(str) (RSTRING(str)->ptr)
6
+ #endif
7
+
8
+ #ifndef RSTRING_LEN
9
+ #define RSTRING_LEN(str) (RSTRING(str)->len)
10
+ #endif
11
+
12
+ #ifndef RARRAY_PTR
13
+ #define RARRAY_PTR(ary) (RARRAY(ary)->ptr)
14
+ #endif
15
+
16
+ #ifndef RARRAY_LEN
17
+ #define RARRAY_LEN(ary) (RARRAY(ary)->len)
18
+ #endif
19
+
20
+ #ifndef RFLOAT_VALUE
21
+ #define RFLOAT_VALUE(val) (RFLOAT(val)->value)
22
+ #endif
23
+
24
+
25
+ #endif
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+ require 'rbconfig'
3
+ if CONFIG['CC'] == 'gcc'
4
+ CONFIG['CC'] = 'gcc -Wall '
5
+ end
6
+ create_makefile 'amatch'
@@ -0,0 +1,77 @@
1
+ #include "pair.h"
2
+
3
+ #define DEBUG 0
4
+
5
+ static int predict_length(VALUE tokens)
6
+ {
7
+ int i, l, result;
8
+ for (i = 0, result = 0; i < RARRAY_LEN(tokens); i++) {
9
+ VALUE t = rb_ary_entry(tokens, i);
10
+ l = RSTRING_LEN(t) - 1;
11
+ if (l > 0) result += l;
12
+ }
13
+ return result;
14
+ }
15
+
16
+ PairArray *PairArray_new(VALUE tokens)
17
+ {
18
+ int i, j, k, len = predict_length(tokens);
19
+ PairArray *pair_array = ALLOC(PairArray);
20
+ Pair *pairs = ALLOC_N(Pair, len);
21
+ MEMZERO(pairs, Pair, len);
22
+ pair_array->pairs = pairs;
23
+ pair_array->len = len;
24
+ for (i = 0, k = 0; i < RARRAY_LEN(tokens); i++) {
25
+ VALUE t = rb_ary_entry(tokens, i);
26
+ char *string = RSTRING_PTR(t);
27
+ for (j = 0; j < RSTRING_LEN(t) - 1; j++) {
28
+ pairs[k].fst = string[j];
29
+ pairs[k].snd = string[j + 1];
30
+ pairs[k].status = PAIR_ACTIVE;
31
+ k++;
32
+ }
33
+ }
34
+ return pair_array;
35
+ }
36
+
37
+ void pair_array_reactivate(PairArray *self)
38
+ {
39
+ int i;
40
+ for (i = 0; i < self->len; i++) {
41
+ self->pairs[i].status = PAIR_ACTIVE;
42
+ }
43
+ }
44
+
45
+ double pair_array_match(PairArray *self, PairArray *other)
46
+ {
47
+ int i, j, matches = 0;
48
+ int sum = self->len + other->len;
49
+ if (sum == 0) return 1.0;
50
+ for (i = 0; i < self->len; i++) {
51
+ for (j = 0; j < other->len; j++) {
52
+ #if DEBUG
53
+ pair_print(self->pairs[i]);
54
+ putc(' ', stdout);
55
+ pair_print(other->pairs[j]);
56
+ printf(" -> %d\n", pair_equal(self->pairs[i], other->pairs[j]));
57
+ #endif
58
+ if (pair_equal(self->pairs[i], other->pairs[j])) {
59
+ matches++;
60
+ other->pairs[j].status = PAIR_INACTIVE;
61
+ break;
62
+ }
63
+ }
64
+ }
65
+ return ((double) (2 * matches)) / sum;
66
+ }
67
+
68
+ void pair_print(Pair pair)
69
+ {
70
+ printf("%c%c (%d)", pair.fst, pair.snd, pair.status);
71
+ }
72
+
73
+ void pair_array_destroy(PairArray *pair_array)
74
+ {
75
+ free(pair_array->pairs);
76
+ free(pair_array);
77
+ }
@@ -0,0 +1,29 @@
1
+ #ifndef PAIR_H_INCLUDED
2
+ #define PAIR_H_INCLUDED
3
+
4
+ #include "ruby.h"
5
+ #include "common.h"
6
+
7
+ enum { PAIR_ACTIVE = 1, PAIR_INACTIVE = 2 };
8
+
9
+ typedef struct PairStruct {
10
+ char fst;
11
+ char snd;
12
+ char status;
13
+ char __align;
14
+ } Pair;
15
+
16
+ typedef struct PairArrayStruct {
17
+ Pair *pairs;
18
+ int len;
19
+ } PairArray;
20
+
21
+ PairArray *PairArray_new(VALUE tokens);
22
+ #define pair_equal(a, b) \
23
+ ((a).fst == (b).fst && (a).snd == (b).snd && ((a).status & (b).status & PAIR_ACTIVE))
24
+ double pair_array_match(PairArray *self, PairArray *other);
25
+ void pair_array_destroy(PairArray *pair_array);
26
+ void pair_print(Pair pair);
27
+ void pair_array_reactivate(PairArray *self);
28
+
29
+ #endif
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbconfig'
4
+ include Config
5
+ require 'fileutils'
6
+ include FileUtils::Verbose
7
+
8
+ MAKE = ENV['MAKE'] || %w[gmake make].find { |c| system(c, '-v') }
9
+
10
+ bindir = CONFIG['bindir']
11
+ archdir = CONFIG['sitearchdir']
12
+ libdir = CONFIG['sitelibdir']
13
+ dlext = CONFIG['DLEXT']
14
+ cd 'ext' do
15
+ system 'ruby extconf.rb' or exit 1
16
+ system "#{MAKE}" or exit 1
17
+ mkdir_p archdir
18
+ install "amatch.#{dlext}", archdir
19
+ end
20
+ cd 'bin' do
21
+ filename = 'edit_json.rb'
22
+ install('agrep.rb', bindir)
23
+ end
24
+ cd 'lib/amatch' do
25
+ mkdir_p d = File.join(libdir, 'amatch')
26
+ install 'version.rb', d
27
+ end
28
+ warn " *** Installed amatch extension."
Binary file
@@ -0,0 +1,8 @@
1
+ module Amatch
2
+ # Amatch version
3
+ VERSION = '0.2.5'
4
+ VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
5
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
7
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
8
+ end
@@ -0,0 +1,58 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestHamming < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = Hamming.new('test')
11
+ @empty = Hamming.new('')
12
+ @long = Hamming.new('A' * 160)
13
+ end
14
+
15
+ def test_empty
16
+ assert_in_delta 0, @empty.match(''), D
17
+ assert_in_delta 9, @empty.match('not empty'), D
18
+ assert_in_delta 1, @empty.similar(''), D
19
+ assert_in_delta 0, @empty.similar('not empty'), D
20
+ end
21
+
22
+ def test_small_match
23
+ assert_in_delta 4, @small.match(''), D
24
+ assert_in_delta 0, @small.match('test'), D
25
+ assert_in_delta 1, @small.match('testa'), D
26
+ assert_in_delta 5, @small.match('atest'), D
27
+ assert_in_delta 3, @small.match('teast'), D
28
+ assert_in_delta 4, @small.match('est'), D
29
+ assert_in_delta 1, @small.match('tes'), D
30
+ assert_in_delta 3, @small.match('tst'), D
31
+ assert_in_delta 1, @small.match('best'), D
32
+ assert_in_delta 1, @small.match('tost'), D
33
+ assert_in_delta 1, @small.match('tesa'), D
34
+ assert_in_delta 3, @small.match('taex'), D
35
+ assert_in_delta 9, @small.match('aaatestbbb'), D
36
+ end
37
+
38
+ def test_small_similar
39
+ assert_in_delta 0.0, @small.similar(''), D
40
+ assert_in_delta 1.0, @small.similar('test'), D
41
+ assert_in_delta 0.8, @small.similar('testa'), D
42
+ assert_in_delta 0.0, @small.similar('atest'), D
43
+ assert_in_delta 0.4, @small.similar('teast'), D
44
+ assert_in_delta 0, @small.similar('est'), D
45
+ assert_in_delta 0.75, @small.similar('tes'), D
46
+ assert_in_delta 0.25, @small.similar('tst'), D
47
+ assert_in_delta 0.75, @small.similar('best'), D
48
+ assert_in_delta 0.75, @small.similar('tost'), D
49
+ assert_in_delta 0.75, @small.similar('tesa'), D
50
+ assert_in_delta 0.25, @small.similar('taex'), D
51
+ assert_in_delta 0.1, @small.similar('aaatestbbb'), D
52
+ assert_in_delta 0.8, @small.pattern.hamming_similar('testa'), D
53
+ end
54
+
55
+ def test_long
56
+ assert_in_delta 1.0, @long.similar(@long.pattern), D
57
+ end
58
+ end
@@ -0,0 +1,29 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + "/../ext/amatch"
3
+
4
+ class TestJaro < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.0005
8
+
9
+ def setup
10
+ @martha = Jaro.new('Martha')
11
+ @dwayne = Jaro.new('dwayne')
12
+ @dixon = Jaro.new('DIXON')
13
+ @one = Jaro.new('one')
14
+ end
15
+
16
+ def test_case
17
+ @martha.ignore_case = true
18
+ assert_in_delta 0.944, @martha.match('MARHTA'), D
19
+ @martha.ignore_case = false
20
+ assert_in_delta 0.444, @martha.match('MARHTA'), D
21
+ end
22
+
23
+ def test_match
24
+ assert_in_delta 0.944, @martha.match('MARHTA'), D
25
+ assert_in_delta 0.822, @dwayne.match('DUANE'), D
26
+ assert_in_delta 0.767, @dixon.match('DICKSONX'), D
27
+ assert_in_delta 0.667, @one.match('orange'), D
28
+ end
29
+ end
@@ -0,0 +1,38 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestJaroWinkler < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.0005
8
+
9
+ def setup
10
+ @martha = JaroWinkler.new('Martha')
11
+ @dwayne = JaroWinkler.new('dwayne')
12
+ @dixon = JaroWinkler.new('DIXON')
13
+ @one = JaroWinkler.new("one")
14
+ end
15
+
16
+ def test_case
17
+ @martha.ignore_case = true
18
+ assert_in_delta 0.961, @martha.match('MARHTA'), D
19
+ @martha.ignore_case = false
20
+ assert_in_delta 0.500, @martha.match('MARHTA'), D
21
+ end
22
+
23
+ def test_match
24
+ assert_in_delta 0.961, @martha.match('MARHTA'), D
25
+ assert_in_delta 0.840, @dwayne.match('DUANE'), D
26
+ assert_in_delta 0.813, @dixon.match('DICKSONX'), D
27
+ assert_in_delta 0, @one.match('two'), D
28
+ assert_in_delta 0.700, @one.match('orange'), D
29
+ end
30
+
31
+ def test_scaling_factor
32
+ assert_in_delta 0.1, @martha.scaling_factor, 0.0000001
33
+ @martha.scaling_factor = 0.2
34
+ assert_in_delta 0.978, @martha.match('MARHTA'), D
35
+ @martha.scaling_factor = 0.5 # this is far too high
36
+ assert_in_delta 1.028, @martha.match('MARHTA'), D
37
+ end
38
+ end
@@ -0,0 +1,83 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestLevenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ def setup
8
+ @d = 0.000001
9
+ @empty = Levenshtein.new('')
10
+ @simple = Levenshtein.new('test')
11
+ @long = Levenshtein.new('A' * 160)
12
+ end
13
+
14
+ def test_match
15
+ assert_equal 4, @simple.match('')
16
+ assert_equal 0, @simple.match('test')
17
+ assert_equal 0, @simple.match('test')
18
+ assert_equal 1, @simple.match('testa')
19
+ assert_equal 1, @simple.match('atest')
20
+ assert_equal 1, @simple.match('teast')
21
+ assert_equal 1, @simple.match('est')
22
+ assert_equal 1, @simple.match('tes')
23
+ assert_equal 1, @simple.match('tst')
24
+ assert_equal 1, @simple.match('best')
25
+ assert_equal 1, @simple.match('tost')
26
+ assert_equal 1, @simple.match('tesa')
27
+ assert_equal 3, @simple.match('taex')
28
+ assert_equal 6, @simple.match('aaatestbbb')
29
+ end
30
+
31
+ def test_search
32
+ assert_equal 4, @simple.search('')
33
+ assert_equal 0, @empty.search('')
34
+ assert_equal 0, @empty.search('test')
35
+ assert_equal 0, @simple.search('aaatestbbb')
36
+ assert_equal 3, @simple.search('aaataexbbb')
37
+ assert_equal 4, @simple.search('aaaaaaaaa')
38
+ end
39
+
40
+ def test_array_result
41
+ assert_equal [2, 0], @simple.match(["tets", "test"])
42
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
43
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
44
+ end
45
+
46
+ def test_pattern_setting
47
+ assert_raises(TypeError) { @simple.pattern = :something }
48
+ assert_equal 0, @simple.match('test')
49
+ @simple.pattern = ''
50
+ assert_equal 4, @simple.match('test')
51
+ @simple.pattern = 'test'
52
+ assert_equal 0, @simple.match('test')
53
+ end
54
+
55
+ def test_similar
56
+ assert_in_delta 1, @empty.similar(''), @d
57
+ assert_in_delta 0, @empty.similar('not empty'), @d
58
+ assert_in_delta 0.0, @simple.similar(''), @d
59
+ assert_in_delta 1.0, @simple.similar('test'), @d
60
+ assert_in_delta 0.8, @simple.similar('testa'), @d
61
+ assert_in_delta 0.8, @simple.similar('atest'), @d
62
+ assert_in_delta 0.8, @simple.similar('teast'), @d
63
+ assert_in_delta 0.75, @simple.similar('est'), @d
64
+ assert_in_delta 0.75, @simple.similar('tes'), @d
65
+ assert_in_delta 0.75, @simple.similar('tst'), @d
66
+ assert_in_delta 0.75, @simple.similar('best'), @d
67
+ assert_in_delta 0.75, @simple.similar('tost'), @d
68
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
69
+ assert_in_delta 0.25, @simple.similar('taex'), @d
70
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
71
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), @d
72
+ end
73
+
74
+ def test_long
75
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
76
+ end
77
+
78
+ def test_long2
79
+ a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
80
+ b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
81
+ assert a.levenshtein_similar(b)
82
+ end
83
+ end
@@ -0,0 +1,61 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestLongestSubsequence < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.000001
8
+
9
+ def setup
10
+ @small = LongestSubsequence.new('test')
11
+ @empty = LongestSubsequence.new('')
12
+ @long = LongestSubsequence.new('A' * 160)
13
+ end
14
+
15
+ def test_empty_subsequence
16
+ assert_equal 0, @empty.match('')
17
+ assert_equal 0, @empty.match('a')
18
+ assert_equal 0, @small.match('')
19
+ assert_equal 0, @empty.match('not empty')
20
+ end
21
+
22
+ def test_small_subsequence
23
+ assert_equal 4, @small.match('test')
24
+ assert_equal 4, @small.match('testa')
25
+ assert_equal 4, @small.match('atest')
26
+ assert_equal 4, @small.match('teast')
27
+ assert_equal 3, @small.match('est')
28
+ assert_equal 3, @small.match('tes')
29
+ assert_equal 3, @small.match('tst')
30
+ assert_equal 3, @small.match('best')
31
+ assert_equal 3, @small.match('tost')
32
+ assert_equal 3, @small.match('tesa')
33
+ assert_equal 2, @small.match('taex')
34
+ assert_equal 1, @small.match('aaatbbb')
35
+ assert_equal 1, @small.match('aaasbbb')
36
+ assert_equal 4, @small.match('aaatestbbb')
37
+ end
38
+
39
+ def test_similar
40
+ assert_in_delta 1, @empty.similar(''), D
41
+ assert_in_delta 0, @empty.similar('not empty'), D
42
+ assert_in_delta 0.0, @small.similar(''), D
43
+ assert_in_delta 1.0, @small.similar('test'), D
44
+ assert_in_delta 0.8, @small.similar('testa'), D
45
+ assert_in_delta 0.8, @small.similar('atest'), D
46
+ assert_in_delta 0.8, @small.similar('teast'), D
47
+ assert_in_delta 0.75, @small.similar('est'), D
48
+ assert_in_delta 0.75, @small.similar('tes'), D
49
+ assert_in_delta 0.75, @small.similar('tst'), D
50
+ assert_in_delta 0.75, @small.similar('best'), D
51
+ assert_in_delta 0.75, @small.similar('tost'), D
52
+ assert_in_delta 0.75, @small.similar('tesa'), D
53
+ assert_in_delta 0.50, @small.similar('taex'), D
54
+ assert_in_delta 0.4, @small.similar('aaatestbbb'), D
55
+ assert_in_delta 0.75, @small.pattern.longest_subsequence_similar('est'), D
56
+ end
57
+
58
+ def test_long
59
+ assert_in_delta 1.0, @long.similar(@long.pattern), D
60
+ end
61
+ end