amatch-rbx 0.2.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.travis.yml +13 -0
- data/CHANGES +50 -0
- data/COPYING +340 -0
- data/Gemfile +5 -0
- data/README.rdoc +138 -0
- data/Rakefile +35 -0
- data/VERSION +1 -0
- data/amatch.gemspec +0 -0
- data/bin/agrep.rb +79 -0
- data/ext/amatch_ext.c +1661 -0
- data/ext/common.h +25 -0
- data/ext/extconf.rb +14 -0
- data/ext/pair.c +77 -0
- data/ext/pair.h +29 -0
- data/install.rb +28 -0
- data/lib/amatch.rb +3 -0
- data/lib/amatch/.keep +0 -0
- data/lib/amatch/polite.rb +7 -0
- data/lib/amatch/rude.rb +7 -0
- data/lib/amatch/version.rb +8 -0
- data/tests/test_hamming.rb +58 -0
- data/tests/test_jaro.rb +29 -0
- data/tests/test_jaro_winkler.rb +38 -0
- data/tests/test_levenshtein.rb +86 -0
- data/tests/test_longest_subsequence.rb +61 -0
- data/tests/test_longest_substring.rb +61 -0
- data/tests/test_pair_distance.rb +92 -0
- data/tests/test_sellers.rb +95 -0
- metadata +173 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TestLongestSubstring < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@small = LongestSubstring.new('test')
|
11
|
+
@empty = LongestSubstring.new('')
|
12
|
+
@long = LongestSubstring.new('A' * 160)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_empty_substring
|
16
|
+
assert_in_delta 0, @empty.match(''), D
|
17
|
+
assert_in_delta 0, @empty.match('a'), D
|
18
|
+
assert_in_delta 0, @small.match(''), D
|
19
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_small_substring
|
23
|
+
assert_in_delta 4, @small.match('test'), D
|
24
|
+
assert_in_delta 4, @small.match('testa'), D
|
25
|
+
assert_in_delta 4, @small.match('atest'), D
|
26
|
+
assert_in_delta 2, @small.match('teast'), D
|
27
|
+
assert_in_delta 3, @small.match('est'), D
|
28
|
+
assert_in_delta 3, @small.match('tes'), D
|
29
|
+
assert_in_delta 2, @small.match('tst'), D
|
30
|
+
assert_in_delta 3, @small.match('best'), D
|
31
|
+
assert_in_delta 2, @small.match('tost'), D
|
32
|
+
assert_in_delta 3, @small.match('tesa'), D
|
33
|
+
assert_in_delta 1, @small.match('taex'), D
|
34
|
+
assert_in_delta 1, @small.match('aaatbbb'), D
|
35
|
+
assert_in_delta 1, @small.match('aaasbbb'), D
|
36
|
+
assert_in_delta 4, @small.match('aaatestbbb'), D
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_similar
|
40
|
+
assert_in_delta 1, @empty.similar(''), D
|
41
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
42
|
+
assert_in_delta 0.0, @small.similar(''), D
|
43
|
+
assert_in_delta 1.0, @small.similar('test'), D
|
44
|
+
assert_in_delta 0.8, @small.similar('testa'), D
|
45
|
+
assert_in_delta 0.8, @small.similar('atest'), D
|
46
|
+
assert_in_delta 0.4, @small.similar('teast'), D
|
47
|
+
assert_in_delta 0.75, @small.similar('est'), D
|
48
|
+
assert_in_delta 0.75, @small.similar('tes'), D
|
49
|
+
assert_in_delta 0.5, @small.similar('tst'), D
|
50
|
+
assert_in_delta 0.75, @small.similar('best'), D
|
51
|
+
assert_in_delta 0.5, @small.similar('tost'), D
|
52
|
+
assert_in_delta 0.75, @small.similar('tesa'), D
|
53
|
+
assert_in_delta 0.25, @small.similar('taex'), D
|
54
|
+
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
55
|
+
assert_in_delta 0.75, @small.pattern.longest_substring_similar('est'), D
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_long
|
59
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TestPairDistance < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
D = 0.000001
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@single = PairDistance.new('test')
|
11
|
+
@empty = PairDistance.new('')
|
12
|
+
@france = PairDistance.new('republic of france')
|
13
|
+
@germany = PairDistance.new('federal republic of germany')
|
14
|
+
@csv = PairDistance.new('foo,bar,baz')
|
15
|
+
@long = PairDistance.new('A' * 160)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_alternative_constant
|
19
|
+
assert_equal PairDistance, DiceCoefficient
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_empty
|
23
|
+
assert_in_delta 1, @empty.match(''), D
|
24
|
+
assert_in_delta 0, @empty.match('not empty'), D
|
25
|
+
assert_in_delta 1, @empty.similar(''), D
|
26
|
+
assert_in_delta 0, @empty.similar('not empty'), D
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_countries
|
30
|
+
assert_in_delta 0.5555555, @france.match('france'), D
|
31
|
+
assert_in_delta 0.1052631, @france.match('germany'), D
|
32
|
+
assert_in_delta 0.4615384, @germany.match('germany'), D
|
33
|
+
assert_in_delta 0.16, @germany.match('france'), D
|
34
|
+
assert_in_delta 0.6829268,
|
35
|
+
@germany.match('german democratic republic'), D
|
36
|
+
assert_in_delta 0.72,
|
37
|
+
@france.match('french republic'), D
|
38
|
+
assert_in_delta 0.4375,
|
39
|
+
@germany.match('french republic'), D
|
40
|
+
assert_in_delta 0.5294117,
|
41
|
+
@france.match('german democratic republic'), D
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_single
|
45
|
+
assert_in_delta 0, @single.match(''), D
|
46
|
+
assert_in_delta 1, @single.match('test'), D
|
47
|
+
assert_in_delta 0.8571428, @single.match('testa'), D
|
48
|
+
assert_in_delta 0.8571428, @single.match('atest'), D
|
49
|
+
assert_in_delta 0.5714285, @single.match('teast'), D
|
50
|
+
assert_in_delta 0.8, @single.match('est'), D
|
51
|
+
assert_in_delta 0.8, @single.match('tes'), D
|
52
|
+
assert_in_delta 0.4, @single.match('tst'), D
|
53
|
+
assert_in_delta 0.6666666, @single.match('best'), D
|
54
|
+
assert_in_delta 0.3333333, @single.match('tost'), D
|
55
|
+
assert_in_delta 0.6666666, @single.match('tesa'), D
|
56
|
+
assert_in_delta 0.0, @single.match('taex'), D
|
57
|
+
assert_in_delta 0.5, @single.match('aaatestbbb'), D
|
58
|
+
assert_in_delta 0.6, @single.match('aaa test bbb'), D
|
59
|
+
assert_in_delta 0.6, @single.match('test aaa bbb'), D
|
60
|
+
assert_in_delta 0.6, @single.match('bbb aaa test'), D
|
61
|
+
assert_in_delta 0.8571428, @single.pattern.pair_distance_similar('atest'), D
|
62
|
+
assert_in_delta 1.0, @france.pattern.pair_distance_similar('of france, republic', /[, ]+/), D
|
63
|
+
assert_in_delta 0.9230769, @france.pattern.pair_distance_similar('of france, republik', /[, ]+/), D
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_csv
|
67
|
+
assert_in_delta 0, @csv.match('', /,/), D
|
68
|
+
assert_in_delta 0.5, @csv.match('foo', /,/), D
|
69
|
+
assert_in_delta 0.5, @csv.match('bar', /,/), D
|
70
|
+
assert_in_delta 0.5, @csv.match('baz', /,/), D
|
71
|
+
assert_in_delta 0.8, @csv.match('foo,bar', /,/), D
|
72
|
+
assert_in_delta 0.8, @csv.match('bar,foo', /,/), D
|
73
|
+
assert_in_delta 0.8, @csv.match('bar,baz', /,/), D
|
74
|
+
assert_in_delta 0.8, @csv.match('baz,bar', /,/), D
|
75
|
+
assert_in_delta 0.8, @csv.match('foo,baz', /,/), D
|
76
|
+
assert_in_delta 0.8, @csv.match('baz,foo', /,/), D
|
77
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', /,/), D
|
78
|
+
assert_in_delta 1, @csv.match('foo,baz,bar', /,/), D
|
79
|
+
assert_in_delta 1, @csv.match('baz,foo,bar', /,/), D
|
80
|
+
assert_in_delta 1, @csv.match('baz,bar,foo', /,/), D
|
81
|
+
assert_in_delta 1, @csv.match('bar,foo,baz', /,/), D
|
82
|
+
assert_in_delta 1, @csv.match('bar,baz,foo', /,/), D
|
83
|
+
assert_in_delta 1, @csv.match('foo,bar,baz', nil), D
|
84
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar', nil), D
|
85
|
+
assert_in_delta 0.9, @csv.match('foo,baz,bar'), D
|
86
|
+
assert_in_delta 0.9, @csv.similar('foo,baz,bar'), D
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_long
|
90
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'amatch'
|
3
|
+
|
4
|
+
class TestSellers < Test::Unit::TestCase
|
5
|
+
include Amatch
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@d = 0.000001
|
9
|
+
@empty = Sellers.new('')
|
10
|
+
@simple = Sellers.new('test')
|
11
|
+
@long = Sellers.new('A' * 160)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_weights
|
15
|
+
assert_in_delta 1, @simple.substitution, @d
|
16
|
+
assert_in_delta 1, @simple.insertion, @d
|
17
|
+
assert_in_delta 1, @simple.deletion, @d
|
18
|
+
@simple.insertion = 1
|
19
|
+
@simple.substitution = @simple.deletion = 1000
|
20
|
+
assert_in_delta 1, @simple.match('tst'), @d
|
21
|
+
assert_in_delta 1, @simple.search('bbbtstccc'), @d
|
22
|
+
@simple.deletion = 1
|
23
|
+
@simple.substitution = @simple.insertion = 1000
|
24
|
+
assert_in_delta 1, @simple.match('tedst'), @d
|
25
|
+
assert_in_delta 1, @simple.search('bbbtedstccc'), @d
|
26
|
+
@simple.substitution = 1
|
27
|
+
@simple.deletion = @simple.insertion = 1000
|
28
|
+
assert_in_delta 1, @simple.match('tast'), @d
|
29
|
+
assert_in_delta 1, @simple.search('bbbtastccc'), @d
|
30
|
+
@simple.insertion = 0.5
|
31
|
+
@simple.substitution = @simple.deletion = 1000
|
32
|
+
assert_in_delta 0.5, @simple.match('tst'), @d
|
33
|
+
assert_in_delta 0.5, @simple.search('bbbtstccc'), @d
|
34
|
+
@simple.deletion = 0.5
|
35
|
+
@simple.substitution = @simple.insertion = 1000
|
36
|
+
assert_in_delta 0.5, @simple.match('tedst'), @d
|
37
|
+
assert_in_delta 0.5, @simple.search('bbbtedstccc'), @d
|
38
|
+
@simple.substitution = 0.5
|
39
|
+
@simple.deletion = @simple.insertion = 1000
|
40
|
+
assert_in_delta 0.5, @simple.match('tast'), @d
|
41
|
+
assert_in_delta 0.5, @simple.search('bbbtastccc'), @d
|
42
|
+
@simple.reset_weights
|
43
|
+
assert_in_delta 1, @simple.substitution, @d
|
44
|
+
assert_in_delta 1, @simple.insertion, @d
|
45
|
+
assert_in_delta 1, @simple.deletion, @d
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_weight_exceptions
|
49
|
+
assert_raises(TypeError) { @simple.substitution = :something }
|
50
|
+
assert_raises(TypeError) { @simple.insertion = :something }
|
51
|
+
assert_raises(TypeError) { @simple.deletion = :something }
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_similar
|
55
|
+
assert_in_delta 0.0, @simple.similar(''), @d
|
56
|
+
assert_in_delta 1.0, @simple.similar('test'), @d
|
57
|
+
assert_in_delta 0.8, @simple.similar('testa'), @d
|
58
|
+
assert_in_delta 0.8, @simple.similar('atest'), @d
|
59
|
+
assert_in_delta 0.8, @simple.similar('teast'), @d
|
60
|
+
assert_in_delta 0.75, @simple.similar('est'), @d
|
61
|
+
assert_in_delta 0.75, @simple.similar('tes'), @d
|
62
|
+
assert_in_delta 0.75, @simple.similar('tst'), @d
|
63
|
+
assert_in_delta 0.75, @simple.similar('best'), @d
|
64
|
+
assert_in_delta 0.75, @simple.similar('tost'), @d
|
65
|
+
assert_in_delta 0.75, @simple.similar('tesa'), @d
|
66
|
+
assert_in_delta 0.25, @simple.similar('taex'), @d
|
67
|
+
assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
|
68
|
+
assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), @d
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_similar2
|
72
|
+
assert_in_delta 1, @empty.similar(''), @d
|
73
|
+
assert_in_delta 0, @empty.similar('not empty'), @d
|
74
|
+
assert_in_delta 0.0, @simple.similar(''), @d
|
75
|
+
assert_in_delta 1.0, @simple.similar('test'), @d
|
76
|
+
assert_in_delta 0.8, @simple.similar('testa'), @d
|
77
|
+
assert_in_delta 0.8, @simple.similar('atest'), @d
|
78
|
+
assert_in_delta 0.8, @simple.similar('teast'), @d
|
79
|
+
assert_in_delta 0.75, @simple.similar('est'), @d
|
80
|
+
assert_in_delta 0.75, @simple.similar('tes'), @d
|
81
|
+
assert_in_delta 0.75, @simple.similar('tst'), @d
|
82
|
+
assert_in_delta 0.75, @simple.similar('best'), @d
|
83
|
+
assert_in_delta 0.75, @simple.similar('tost'), @d
|
84
|
+
assert_in_delta 0.75, @simple.similar('tesa'), @d
|
85
|
+
assert_in_delta 0.25, @simple.similar('taex'), @d
|
86
|
+
assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
|
87
|
+
@simple.insertion = 1
|
88
|
+
@simple.substitution = @simple.deletion = 2
|
89
|
+
assert_in_delta 0.875, @simple.similar('tst'), @d
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_long
|
93
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), @d
|
94
|
+
end
|
95
|
+
end
|
metadata
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: amatch-rbx
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.12
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Florian Frank
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: gem_hadar
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.1.8
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.1.8
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: test-unit
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: utils
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "<"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '11.0'
|
62
|
+
- - "~>"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '10'
|
65
|
+
type: :development
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - "<"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '11.0'
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '10'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: tins
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0.3'
|
82
|
+
type: :runtime
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0.3'
|
89
|
+
description: |
|
90
|
+
Amatch is a library for approximate string matching and searching in strings.
|
91
|
+
Several algorithms can be used to do this, and it's also possible to compute a
|
92
|
+
similarity metric number between 0.0 and 1.0 for two given strings.
|
93
|
+
This is a workaround version to support Rubinius installation.
|
94
|
+
email: flori@ping.de
|
95
|
+
executables:
|
96
|
+
- agrep.rb
|
97
|
+
extensions:
|
98
|
+
- ext/extconf.rb
|
99
|
+
extra_rdoc_files:
|
100
|
+
- README.rdoc
|
101
|
+
- lib/amatch.rb
|
102
|
+
- lib/amatch/polite.rb
|
103
|
+
- lib/amatch/rude.rb
|
104
|
+
- lib/amatch/version.rb
|
105
|
+
- ext/amatch_ext.c
|
106
|
+
- ext/pair.c
|
107
|
+
files:
|
108
|
+
- ".gitignore"
|
109
|
+
- ".travis.yml"
|
110
|
+
- CHANGES
|
111
|
+
- COPYING
|
112
|
+
- Gemfile
|
113
|
+
- README.rdoc
|
114
|
+
- Rakefile
|
115
|
+
- VERSION
|
116
|
+
- amatch.gemspec
|
117
|
+
- bin/agrep.rb
|
118
|
+
- ext/amatch_ext.c
|
119
|
+
- ext/common.h
|
120
|
+
- ext/extconf.rb
|
121
|
+
- ext/pair.c
|
122
|
+
- ext/pair.h
|
123
|
+
- install.rb
|
124
|
+
- lib/amatch.rb
|
125
|
+
- lib/amatch/.keep
|
126
|
+
- lib/amatch/polite.rb
|
127
|
+
- lib/amatch/rude.rb
|
128
|
+
- lib/amatch/version.rb
|
129
|
+
- tests/test_hamming.rb
|
130
|
+
- tests/test_jaro.rb
|
131
|
+
- tests/test_jaro_winkler.rb
|
132
|
+
- tests/test_levenshtein.rb
|
133
|
+
- tests/test_longest_subsequence.rb
|
134
|
+
- tests/test_longest_substring.rb
|
135
|
+
- tests/test_pair_distance.rb
|
136
|
+
- tests/test_sellers.rb
|
137
|
+
homepage: http://github.com/flori/amatch
|
138
|
+
licenses: []
|
139
|
+
metadata: {}
|
140
|
+
post_install_message:
|
141
|
+
rdoc_options:
|
142
|
+
- "--title"
|
143
|
+
- Amatch - Approximate Matching
|
144
|
+
- "--main"
|
145
|
+
- README.rdoc
|
146
|
+
require_paths:
|
147
|
+
- lib
|
148
|
+
- ext
|
149
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
requirements: []
|
160
|
+
rubyforge_project:
|
161
|
+
rubygems_version: 2.2.1
|
162
|
+
signing_key:
|
163
|
+
specification_version: 4
|
164
|
+
summary: Approximate String Matching library
|
165
|
+
test_files:
|
166
|
+
- tests/test_hamming.rb
|
167
|
+
- tests/test_jaro.rb
|
168
|
+
- tests/test_jaro_winkler.rb
|
169
|
+
- tests/test_levenshtein.rb
|
170
|
+
- tests/test_longest_subsequence.rb
|
171
|
+
- tests/test_longest_substring.rb
|
172
|
+
- tests/test_pair_distance.rb
|
173
|
+
- tests/test_sellers.rb
|