amatch 0.2.5-x86-mswin32
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +31 -0
- data/COPYING +340 -0
- data/README +130 -0
- data/Rakefile +141 -0
- data/VERSION +1 -0
- data/amatch.gemspec +31 -0
- data/bin/agrep.rb +79 -0
- data/ext/amatch.c +1641 -0
- data/ext/common.h +25 -0
- data/ext/extconf.rb +6 -0
- data/ext/pair.c +77 -0
- data/ext/pair.h +29 -0
- data/install.rb +28 -0
- data/lib/amatch.so +0 -0
- data/lib/amatch/version.rb +8 -0
- data/tests/test_hamming.rb +58 -0
- data/tests/test_jaro.rb +29 -0
- data/tests/test_jaro_winkler.rb +38 -0
- data/tests/test_levenshtein.rb +83 -0
- data/tests/test_longest_subsequence.rb +61 -0
- data/tests/test_longest_substring.rb +61 -0
- data/tests/test_pair_distance.rb +86 -0
- data/tests/test_sellers.rb +96 -0
- metadata +95 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TestLongestSubstring < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = LongestSubstring.new('test')
|
11
|
+
@empty = LongestSubstring.new('')
|
12
|
+
@long = LongestSubstring.new('A' * 160)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_empty_substring
|
16
|
+
assert_in_delta 0, @empty.match(''), D
|
17
|
+
assert_in_delta 0, @empty.match('a'), D
|
18
|
+
assert_in_delta 0, @small.match(''), D
|
19
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_small_substring
|
23
|
+
assert_in_delta 4, @small.match('test'), D
|
24
|
+
assert_in_delta 4, @small.match('testa'), D
|
25
|
+
assert_in_delta 4, @small.match('atest'), D
|
26
|
+
assert_in_delta 2, @small.match('teast'), D
|
27
|
+
assert_in_delta 3, @small.match('est'), D
|
28
|
+
assert_in_delta 3, @small.match('tes'), D
|
29
|
+
assert_in_delta 2, @small.match('tst'), D
|
30
|
+
assert_in_delta 3, @small.match('best'), D
|
31
|
+
assert_in_delta 2, @small.match('tost'), D
|
32
|
+
assert_in_delta 3, @small.match('tesa'), D
|
33
|
+
assert_in_delta 1, @small.match('taex'), D
|
34
|
+
assert_in_delta 1, @small.match('aaatbbb'), D
|
35
|
+
assert_in_delta 1, @small.match('aaasbbb'), D
|
36
|
+
assert_in_delta 4, @small.match('aaatestbbb'), D
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_similar
|
40
|
+
assert_in_delta 1, @empty.similar(''), D
|
41
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
42
|
+
assert_in_delta 0.0, @small.similar(''), D
|
43
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
44
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
45
|
+
assert_in_delta 0.8, @small.similar('atest'), D
|
46
|
+
assert_in_delta 0.4, @small.similar('teast'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('est'), D
|
48
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
49
|
+
assert_in_delta 0.5, @small.similar('tst'), D
|
50
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
51
|
+
assert_in_delta 0.5, @small.similar('tost'), D
|
52
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
53
|
+
assert_in_delta 0.25, @small.similar('taex'), D
|
54
|
+
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
55
|
+
assert_in_delta 0.75, @small.pattern.longest_substring_similar('est'), D
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_long
|
59
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TestPairDistance < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@single = PairDistance.new('test')
|
11
|
+
@empty = PairDistance.new('')
|
12
|
+
@france = PairDistance.new('republic of france')
|
13
|
+
@germany = PairDistance.new('federal republic of germany')
|
14
|
+
@csv = PairDistance.new('foo,bar,baz')
|
15
|
+
@long = PairDistance.new('A' * 160)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_empty
|
19
|
+
assert_in_delta 1, @empty.match(''), D
|
20
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
21
|
+
assert_in_delta 1, @empty.similar(''), D
|
22
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_countries
|
26
|
+
assert_in_delta 0.5555555, @france.match('france'), D
|
27
|
+
assert_in_delta 0.1052631, @france.match('germany'), D
|
28
|
+
assert_in_delta 0.4615384, @germany.match('germany'), D
|
29
|
+
assert_in_delta 0.16, @germany.match('france'), D
|
30
|
+
assert_in_delta 0.6829268,
|
31
|
+
@germany.match('german democratic republic'), D
|
32
|
+
assert_in_delta 0.72,
|
33
|
+
@france.match('french republic'), D
|
34
|
+
assert_in_delta 0.4375,
|
35
|
+
@germany.match('french republic'), D
|
36
|
+
assert_in_delta 0.5294117,
|
37
|
+
@france.match('german democratic republic'), D
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_single
|
41
|
+
assert_in_delta 0, @single.match(''), D
|
42
|
+
assert_in_delta 1, @single.match('test'), D
|
43
|
+
assert_in_delta 0.8571428, @single.match('testa'), D
|
44
|
+
assert_in_delta 0.8571428, @single.match('atest'), D
|
45
|
+
assert_in_delta 0.5714285, @single.match('teast'), D
|
46
|
+
assert_in_delta 0.8, @single.match('est'), D
|
47
|
+
assert_in_delta 0.8, @single.match('tes'), D
|
48
|
+
assert_in_delta 0.4, @single.match('tst'), D
|
49
|
+
assert_in_delta 0.6666666, @single.match('best'), D
|
50
|
+
assert_in_delta 0.3333333, @single.match('tost'), D
|
51
|
+
assert_in_delta 0.6666666, @single.match('tesa'), D
|
52
|
+
assert_in_delta 0.0, @single.match('taex'), D
|
53
|
+
assert_in_delta 0.5, @single.match('aaatestbbb'), D
|
54
|
+
assert_in_delta 0.6, @single.match('aaa test bbb'), D
|
55
|
+
assert_in_delta 0.6, @single.match('test aaa bbb'), D
|
56
|
+
assert_in_delta 0.6, @single.match('bbb aaa test'), D
|
57
|
+
assert_in_delta 0.8571428, @single.pattern.pair_distance_similar('atest'), D
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_csv
|
61
|
+
assert_in_delta 0, @csv.match('', /,/), D
|
62
|
+
assert_in_delta 0.5, @csv.match('foo', /,/), D
|
63
|
+
assert_in_delta 0.5, @csv.match('bar', /,/), D
|
64
|
+
assert_in_delta 0.5, @csv.match('baz', /,/), D
|
65
|
+
assert_in_delta 0.8, @csv.match('foo,bar', /,/), D
|
66
|
+
assert_in_delta 0.8, @csv.match('bar,foo', /,/), D
|
67
|
+
assert_in_delta 0.8, @csv.match('bar,baz', /,/), D
|
68
|
+
assert_in_delta 0.8, @csv.match('baz,bar', /,/), D
|
69
|
+
assert_in_delta 0.8, @csv.match('foo,baz', /,/), D
|
70
|
+
assert_in_delta 0.8, @csv.match('baz,foo', /,/), D
|
71
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', /,/), D
|
72
|
+
assert_in_delta 1, @csv.match('foo,baz,bar', /,/), D
|
73
|
+
assert_in_delta 1, @csv.match('baz,foo,bar', /,/), D
|
74
|
+
assert_in_delta 1, @csv.match('baz,bar,foo', /,/), D
|
75
|
+
assert_in_delta 1, @csv.match('bar,foo,baz', /,/), D
|
76
|
+
assert_in_delta 1, @csv.match('bar,baz,foo', /,/), D
|
77
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', nil), D
|
78
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar', nil), D
|
79
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar'), D
|
80
|
+
assert_in_delta 0.9, @csv.similar('foo,baz,bar'), D
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_long
|
84
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
require 'test_levenshtein'
|
4
|
+
|
5
|
+
class TestSellers < TestLevenshtein
|
6
|
+
include Amatch
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@d = 0.000001
|
10
|
+
@empty = Sellers.new('')
|
11
|
+
@simple = Sellers.new('test')
|
12
|
+
@long = Sellers.new('A' * 160)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_weights
|
16
|
+
assert_in_delta 1, @simple.substitution, @d
|
17
|
+
assert_in_delta 1, @simple.insertion, @d
|
18
|
+
assert_in_delta 1, @simple.deletion, @d
|
19
|
+
@simple.insertion = 1
|
20
|
+
@simple.substitution = @simple.deletion = 1000
|
21
|
+
assert_in_delta 1, @simple.match('tst'), @d
|
22
|
+
assert_in_delta 1, @simple.search('bbbtstccc'), @d
|
23
|
+
@simple.deletion = 1
|
24
|
+
@simple.substitution = @simple.insertion = 1000
|
25
|
+
assert_in_delta 1, @simple.match('tedst'), @d
|
26
|
+
assert_in_delta 1, @simple.search('bbbtedstccc'), @d
|
27
|
+
@simple.substitution = 1
|
28
|
+
@simple.deletion = @simple.insertion = 1000
|
29
|
+
assert_in_delta 1, @simple.match('tast'), @d
|
30
|
+
assert_in_delta 1, @simple.search('bbbtastccc'), @d
|
31
|
+
@simple.insertion = 0.5
|
32
|
+
@simple.substitution = @simple.deletion = 1000
|
33
|
+
assert_in_delta 0.5, @simple.match('tst'), @d
|
34
|
+
assert_in_delta 0.5, @simple.search('bbbtstccc'), @d
|
35
|
+
@simple.deletion = 0.5
|
36
|
+
@simple.substitution = @simple.insertion = 1000
|
37
|
+
assert_in_delta 0.5, @simple.match('tedst'), @d
|
38
|
+
assert_in_delta 0.5, @simple.search('bbbtedstccc'), @d
|
39
|
+
@simple.substitution = 0.5
|
40
|
+
@simple.deletion = @simple.insertion = 1000
|
41
|
+
assert_in_delta 0.5, @simple.match('tast'), @d
|
42
|
+
assert_in_delta 0.5, @simple.search('bbbtastccc'), @d
|
43
|
+
@simple.reset_weights
|
44
|
+
assert_in_delta 1, @simple.substitution, @d
|
45
|
+
assert_in_delta 1, @simple.insertion, @d
|
46
|
+
assert_in_delta 1, @simple.deletion, @d
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_weight_exceptions
|
50
|
+
assert_raises(TypeError) { @simple.substitution = :something }
|
51
|
+
assert_raises(TypeError) { @simple.insertion = :something }
|
52
|
+
assert_raises(TypeError) { @simple.deletion = :something }
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_similar
|
56
|
+
assert_in_delta 0.0, @simple.similar(''), @d
|
57
|
+
assert_in_delta 1.0, @simple.similar('test'), @d
|
58
|
+
assert_in_delta 0.8, @simple.similar('testa'), @d
|
59
|
+
assert_in_delta 0.8, @simple.similar('atest'), @d
|
60
|
+
assert_in_delta 0.8, @simple.similar('teast'), @d
|
61
|
+
assert_in_delta 0.75, @simple.similar('est'), @d
|
62
|
+
assert_in_delta 0.75, @simple.similar('tes'), @d
|
63
|
+
assert_in_delta 0.75, @simple.similar('tst'), @d
|
64
|
+
assert_in_delta 0.75, @simple.similar('best'), @d
|
65
|
+
assert_in_delta 0.75, @simple.similar('tost'), @d
|
66
|
+
assert_in_delta 0.75, @simple.similar('tesa'), @d
|
67
|
+
assert_in_delta 0.25, @simple.similar('taex'), @d
|
68
|
+
assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
|
69
|
+
assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), @d
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_similar
|
73
|
+
assert_in_delta 1, @empty.similar(''), @d
|
74
|
+
assert_in_delta 0, @empty.similar('not empty'), @d
|
75
|
+
assert_in_delta 0.0, @simple.similar(''), @d
|
76
|
+
assert_in_delta 1.0, @simple.similar('test'), @d
|
77
|
+
assert_in_delta 0.8, @simple.similar('testa'), @d
|
78
|
+
assert_in_delta 0.8, @simple.similar('atest'), @d
|
79
|
+
assert_in_delta 0.8, @simple.similar('teast'), @d
|
80
|
+
assert_in_delta 0.75, @simple.similar('est'), @d
|
81
|
+
assert_in_delta 0.75, @simple.similar('tes'), @d
|
82
|
+
assert_in_delta 0.75, @simple.similar('tst'), @d
|
83
|
+
assert_in_delta 0.75, @simple.similar('best'), @d
|
84
|
+
assert_in_delta 0.75, @simple.similar('tost'), @d
|
85
|
+
assert_in_delta 0.75, @simple.similar('tesa'), @d
|
86
|
+
assert_in_delta 0.25, @simple.similar('taex'), @d
|
87
|
+
assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
|
88
|
+
@simple.insertion = 1
|
89
|
+
@simple.substitution = @simple.deletion = 2
|
90
|
+
assert_in_delta 0.875, @simple.similar('tst'), @d
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_long
|
94
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), @d
|
95
|
+
end
|
96
|
+
end
|
metadata
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: amatch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.5
|
5
|
+
platform: x86-mswin32
|
6
|
+
authors:
|
7
|
+
- Florian Frank
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-09-25 00:00:00 +02:00
|
13
|
+
default_executable: agrep.rb
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: |
|
17
|
+
Amatch is a library for approximate string matching and searching in strings.
|
18
|
+
Several algorithms can be used to do this, and it's also possible to compute a
|
19
|
+
similarity metric number between 0.0 and 1.0 for two given strings.
|
20
|
+
|
21
|
+
email: flori@ping.de
|
22
|
+
executables:
|
23
|
+
- agrep.rb
|
24
|
+
extensions: []
|
25
|
+
|
26
|
+
extra_rdoc_files:
|
27
|
+
- README
|
28
|
+
- ext/amatch.c
|
29
|
+
- lib/amatch/version.rb
|
30
|
+
files:
|
31
|
+
- CHANGES
|
32
|
+
- COPYING
|
33
|
+
- README
|
34
|
+
- Rakefile
|
35
|
+
- VERSION
|
36
|
+
- amatch.gemspec
|
37
|
+
- bin/agrep.rb
|
38
|
+
- ext/amatch.c
|
39
|
+
- ext/common.h
|
40
|
+
- ext/extconf.rb
|
41
|
+
- ext/pair.c
|
42
|
+
- ext/pair.h
|
43
|
+
- install.rb
|
44
|
+
- lib/amatch/version.rb
|
45
|
+
- tests/test_hamming.rb
|
46
|
+
- tests/test_jaro.rb
|
47
|
+
- tests/test_jaro_winkler.rb
|
48
|
+
- tests/test_levenshtein.rb
|
49
|
+
- tests/test_longest_subsequence.rb
|
50
|
+
- tests/test_longest_substring.rb
|
51
|
+
- tests/test_pair_distance.rb
|
52
|
+
- tests/test_sellers.rb
|
53
|
+
- lib/amatch.so
|
54
|
+
has_rdoc: true
|
55
|
+
homepage: http://amatch.rubyforge.org
|
56
|
+
licenses: []
|
57
|
+
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options:
|
60
|
+
- --main
|
61
|
+
- README
|
62
|
+
- --title
|
63
|
+
- amatch - Approximate Matching
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
- ext
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: "0"
|
73
|
+
version:
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: "0"
|
79
|
+
version:
|
80
|
+
requirements: []
|
81
|
+
|
82
|
+
rubyforge_project: amatch
|
83
|
+
rubygems_version: 1.3.4
|
84
|
+
signing_key:
|
85
|
+
specification_version: 3
|
86
|
+
summary: Approximate String Matching library
|
87
|
+
test_files:
|
88
|
+
- tests/test_longest_substring.rb
|
89
|
+
- tests/test_hamming.rb
|
90
|
+
- tests/test_longest_subsequence.rb
|
91
|
+
- tests/test_pair_distance.rb
|
92
|
+
- tests/test_levenshtein.rb
|
93
|
+
- tests/test_jaro.rb
|
94
|
+
- tests/test_sellers.rb
|
95
|
+
- tests/test_jaro_winkler.rb
|