amatch 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +3 -0
- data/VERSION +1 -1
- data/ext/amatch.c +7 -7
- data/tests/test_hamming.rb +5 -0
- data/tests/test_levenshtein.rb +7 -2
- data/tests/test_longest_subsequence.rb +6 -1
- data/tests/test_longest_substring.rb +6 -1
- data/tests/test_pair_distance.rb +6 -0
- data/tests/test_sellers.rb +7 -3
- metadata +15 -17
- data/InstalledFiles +0 -5
- data/config.save +0 -12
- data/ext/amatch.bundle +0 -0
- data/ext/tags +0 -24
data/CHANGES
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/ext/amatch.c
CHANGED
@@ -157,7 +157,7 @@ VALUE function(VALUE self, VALUE value) \
|
|
157
157
|
|
158
158
|
typedef struct GeneralStruct {
|
159
159
|
char *pattern;
|
160
|
-
|
160
|
+
int pattern_len;
|
161
161
|
} General;
|
162
162
|
|
163
163
|
DEF_ALLOCATOR(General)
|
@@ -166,7 +166,7 @@ DEF_ITERATE_STRINGS(General)
|
|
166
166
|
|
167
167
|
typedef struct SellersStruct {
|
168
168
|
char *pattern;
|
169
|
-
|
169
|
+
int pattern_len;
|
170
170
|
double substitution;
|
171
171
|
double deletion;
|
172
172
|
double insertion;
|
@@ -185,7 +185,7 @@ static void Sellers_reset_weights(Sellers *self)
|
|
185
185
|
|
186
186
|
typedef struct PairDistanceStruct {
|
187
187
|
char *pattern;
|
188
|
-
|
188
|
+
int pattern_len;
|
189
189
|
PairArray *pattern_pair_array;
|
190
190
|
} PairDistance;
|
191
191
|
|
@@ -694,7 +694,7 @@ static VALUE rb_Levenshtein_similar(VALUE self, VALUE strings)
|
|
694
694
|
*/
|
695
695
|
static VALUE rb_str_levenshtein_similar(VALUE self, VALUE strings)
|
696
696
|
{
|
697
|
-
VALUE amatch = rb_Levenshtein_new(
|
697
|
+
VALUE amatch = rb_Levenshtein_new(rb_cLevenshtein, self);
|
698
698
|
return rb_Levenshtein_similar(amatch, strings);
|
699
699
|
}
|
700
700
|
|
@@ -982,7 +982,7 @@ static VALUE rb_PairDistance_match(int argc, VALUE *argv, VALUE self)
|
|
982
982
|
*/
|
983
983
|
static VALUE rb_str_pair_distance_similar(VALUE self, VALUE strings)
|
984
984
|
{
|
985
|
-
VALUE amatch = rb_PairDistance_new(
|
985
|
+
VALUE amatch = rb_PairDistance_new(rb_cPairDistance, self);
|
986
986
|
return rb_PairDistance_match(1, &strings, amatch);
|
987
987
|
}
|
988
988
|
|
@@ -1133,7 +1133,7 @@ static VALUE rb_LongestSubsequence_similar(VALUE self, VALUE strings)
|
|
1133
1133
|
*/
|
1134
1134
|
static VALUE rb_str_longest_subsequence_similar(VALUE self, VALUE strings)
|
1135
1135
|
{
|
1136
|
-
VALUE amatch = rb_LongestSubsequence_new(
|
1136
|
+
VALUE amatch = rb_LongestSubsequence_new(rb_cLongestSubsequence, self);
|
1137
1137
|
return rb_LongestSubsequence_similar(amatch, strings);
|
1138
1138
|
}
|
1139
1139
|
|
@@ -1211,7 +1211,7 @@ static VALUE rb_LongestSubstring_similar(VALUE self, VALUE strings)
|
|
1211
1211
|
*/
|
1212
1212
|
static VALUE rb_str_longest_substring_similar(VALUE self, VALUE strings)
|
1213
1213
|
{
|
1214
|
-
VALUE amatch =
|
1214
|
+
VALUE amatch = rb_LongestSubstring_new(rb_cLongestSubstring, self);
|
1215
1215
|
return rb_LongestSubstring_similar(amatch, strings);
|
1216
1216
|
}
|
1217
1217
|
|
data/tests/test_hamming.rb
CHANGED
@@ -9,6 +9,7 @@ class TC_Hamming < Test::Unit::TestCase
|
|
9
9
|
def setup
|
10
10
|
@small = Hamming.new('test')
|
11
11
|
@empty = Hamming.new('')
|
12
|
+
@long = Hamming.new('A' * 160)
|
12
13
|
end
|
13
14
|
|
14
15
|
def test_empty
|
@@ -50,5 +51,9 @@ class TC_Hamming < Test::Unit::TestCase
|
|
50
51
|
assert_in_delta 0.1, @small.similar('aaatestbbb'), D
|
51
52
|
assert_in_delta 0.8, @small.pattern.hamming_similar('testa'), D
|
52
53
|
end
|
54
|
+
|
55
|
+
def test_long
|
56
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
57
|
+
end
|
53
58
|
end
|
54
59
|
# vim: set et sw=2 ts=2:
|
data/tests/test_levenshtein.rb
CHANGED
@@ -7,8 +7,9 @@ class TC_Levenshtein < Test::Unit::TestCase
|
|
7
7
|
D = 0.000001
|
8
8
|
|
9
9
|
def setup
|
10
|
-
@empty
|
11
|
-
@simple
|
10
|
+
@empty = Levenshtein.new('')
|
11
|
+
@simple = Levenshtein.new('test')
|
12
|
+
@long = Levenshtein.new('A' * 160)
|
12
13
|
end
|
13
14
|
|
14
15
|
def test_match
|
@@ -70,5 +71,9 @@ class TC_Levenshtein < Test::Unit::TestCase
|
|
70
71
|
assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
|
71
72
|
assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
|
72
73
|
end
|
74
|
+
|
75
|
+
def test_long
|
76
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
77
|
+
end
|
73
78
|
end
|
74
79
|
# vim: set et sw=2 ts=2:
|
@@ -9,6 +9,7 @@ class TC_LongestSubsequence < Test::Unit::TestCase
|
|
9
9
|
def setup
|
10
10
|
@small = LongestSubsequence.new('test')
|
11
11
|
@empty = LongestSubsequence.new('')
|
12
|
+
@long = LongestSubsequence.new('A' * 160)
|
12
13
|
end
|
13
14
|
|
14
15
|
def test_empty_subsequence
|
@@ -51,7 +52,11 @@ class TC_LongestSubsequence < Test::Unit::TestCase
|
|
51
52
|
assert_in_delta 0.75, @small.similar('tesa'), D
|
52
53
|
assert_in_delta 0.50, @small.similar('taex'), D
|
53
54
|
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
54
|
-
assert_in_delta 0.75, @small.pattern.
|
55
|
+
assert_in_delta 0.75, @small.pattern.longest_subsequence_similar('est'), D
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_long
|
59
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
55
60
|
end
|
56
61
|
end
|
57
62
|
# vim: set et sw=2 ts=2:
|
@@ -9,6 +9,7 @@ class TC_LongestSubstring < Test::Unit::TestCase
|
|
9
9
|
def setup
|
10
10
|
@small = LongestSubstring.new('test')
|
11
11
|
@empty = LongestSubstring.new('')
|
12
|
+
@long = LongestSubstring.new('A' * 160)
|
12
13
|
end
|
13
14
|
|
14
15
|
def test_empty_substring
|
@@ -51,7 +52,11 @@ class TC_LongestSubstring < Test::Unit::TestCase
|
|
51
52
|
assert_in_delta 0.75, @small.similar('tesa'), D
|
52
53
|
assert_in_delta 0.25, @small.similar('taex'), D
|
53
54
|
assert_in_delta 0.4, @small.similar('aaatestbbb'), D
|
54
|
-
assert_in_delta 0.75, @small.pattern.
|
55
|
+
assert_in_delta 0.75, @small.pattern.longest_substring_similar('est'), D
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_long
|
59
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
55
60
|
end
|
56
61
|
end
|
57
62
|
# vim: set et sw=2 ts=2:
|
data/tests/test_pair_distance.rb
CHANGED
@@ -12,6 +12,7 @@ class TC_PairDistance < Test::Unit::TestCase
|
|
12
12
|
@france = PairDistance.new('republic of france')
|
13
13
|
@germany = PairDistance.new('federal republic of germany')
|
14
14
|
@csv = PairDistance.new('foo,bar,baz')
|
15
|
+
@long = PairDistance.new('A' * 160)
|
15
16
|
end
|
16
17
|
|
17
18
|
def test_empty
|
@@ -53,6 +54,7 @@ class TC_PairDistance < Test::Unit::TestCase
|
|
53
54
|
assert_in_delta 0.6, @single.match('aaa test bbb'), D
|
54
55
|
assert_in_delta 0.6, @single.match('test aaa bbb'), D
|
55
56
|
assert_in_delta 0.6, @single.match('bbb aaa test'), D
|
57
|
+
assert_in_delta 0.8571428, @single.pattern.pair_distance_similar('atest'), D
|
56
58
|
end
|
57
59
|
|
58
60
|
def test_csv
|
@@ -77,5 +79,9 @@ class TC_PairDistance < Test::Unit::TestCase
|
|
77
79
|
assert_in_delta 0.9, @csv.match('foo,baz,bar'), D
|
78
80
|
assert_in_delta 0.9, @csv.similar('foo,baz,bar'), D
|
79
81
|
end
|
82
|
+
|
83
|
+
def test_long
|
84
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
85
|
+
end
|
80
86
|
end
|
81
87
|
# vim: set et sw=2 ts=2:
|
data/tests/test_sellers.rb
CHANGED
@@ -8,8 +8,9 @@ class TC_Sellers < TC_Levenshtein
|
|
8
8
|
D = 0.000001
|
9
9
|
|
10
10
|
def setup
|
11
|
-
@empty
|
12
|
-
@simple
|
11
|
+
@empty = Sellers.new('')
|
12
|
+
@simple = Sellers.new('test')
|
13
|
+
@long = Sellers.new('A' * 160)
|
13
14
|
end
|
14
15
|
|
15
16
|
def test_weights
|
@@ -85,10 +86,13 @@ class TC_Sellers < TC_Levenshtein
|
|
85
86
|
assert_in_delta 0.75, @simple.similar('tesa'), D
|
86
87
|
assert_in_delta 0.25, @simple.similar('taex'), D
|
87
88
|
assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
|
88
|
-
assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
|
89
89
|
@simple.insertion = 1
|
90
90
|
@simple.substitution = @simple.deletion = 2
|
91
91
|
assert_in_delta 0.875, @simple.similar('tst'), D
|
92
92
|
end
|
93
|
+
|
94
|
+
def test_long
|
95
|
+
assert_in_delta 1.0, @long.similar(@long.pattern), D
|
96
|
+
end
|
93
97
|
end
|
94
98
|
# vim: set et sw=2 ts=2:
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.
|
2
|
+
rubygems_version: 0.8.11
|
3
3
|
specification_version: 1
|
4
4
|
name: amatch
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2005-
|
6
|
+
version: 0.2.1
|
7
|
+
date: 2005-09-12 00:00:00 +02:00
|
8
8
|
summary: Approximate String Matching library
|
9
9
|
require_paths:
|
10
10
|
- ext
|
@@ -26,6 +26,8 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
26
26
|
version: 0.0.0
|
27
27
|
version:
|
28
28
|
platform: ruby
|
29
|
+
signing_key:
|
30
|
+
cert_chain:
|
29
31
|
authors:
|
30
32
|
- Florian Frank
|
31
33
|
files:
|
@@ -33,28 +35,24 @@ files:
|
|
33
35
|
- CHANGES
|
34
36
|
- GPL
|
35
37
|
- bin
|
36
|
-
-
|
38
|
+
- Rakefile
|
39
|
+
- VERSION
|
40
|
+
- install.rb
|
37
41
|
- ext
|
42
|
+
- tests
|
43
|
+
- bin/agrep.rb
|
38
44
|
- ext/MANIFEST
|
39
45
|
- ext/amatch.c
|
40
46
|
- ext/extconf.rb
|
41
|
-
- ext/amatch.bundle
|
42
|
-
- ext/tags
|
43
|
-
- ext/pair.h
|
44
47
|
- ext/pair.c
|
45
|
-
-
|
46
|
-
- VERSION
|
47
|
-
- install.rb
|
48
|
-
- config.save
|
49
|
-
- InstalledFiles
|
50
|
-
- tests
|
51
|
-
- tests/test_levenshtein.rb
|
52
|
-
- tests/test_hamming.rb
|
53
|
-
- tests/test_pair_distance.rb
|
48
|
+
- ext/pair.h
|
54
49
|
- tests/runner.rb
|
55
|
-
- tests/
|
50
|
+
- tests/test_hamming.rb
|
51
|
+
- tests/test_levenshtein.rb
|
56
52
|
- tests/test_longest_subsequence.rb
|
57
53
|
- tests/test_longest_substring.rb
|
54
|
+
- tests/test_pair_distance.rb
|
55
|
+
- tests/test_sellers.rb
|
58
56
|
test_files:
|
59
57
|
- tests/runner.rb
|
60
58
|
rdoc_options:
|
data/InstalledFiles
DELETED
data/config.save
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
bin-dir=$prefix/bin
|
2
|
-
site-ruby=$prefix/lib/ruby/site_ruby/1.8
|
3
|
-
prefix=/usr/local/stow/ruby
|
4
|
-
ruby-path=/usr/local/stow/ruby/bin/ruby
|
5
|
-
make-prog=make
|
6
|
-
rb-dir=$site-ruby
|
7
|
-
without-ext=no
|
8
|
-
ruby-prog=/usr/local/stow/ruby/bin/ruby
|
9
|
-
site-ruby-common=$prefix/lib/ruby/site_ruby
|
10
|
-
std-ruby=$prefix/lib/ruby/1.8
|
11
|
-
data-dir=$prefix/share
|
12
|
-
so-dir=$prefix/lib/ruby/site_ruby/1.8/i686-linux
|
data/ext/amatch.bundle
DELETED
Binary file
|
data/ext/tags
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
CAST2FLOAT amatch.c /^#define CAST2FLOAT(obj) \\$/
|
2
|
-
DEF_ALLOCATOR amatch.c /^#define DEF_ALLOCATOR(type) /
|
3
|
-
DEF_CONSTRUCTOR amatch.c /^#define DEF_CONSTRUCTOR(klass, type) /
|
4
|
-
DEF_ITERATE_STRINGS amatch.c /^#define DEF_ITERATE_STRINGS(type) /
|
5
|
-
DEF_PATTERN_ACCESSOR amatch.c /^#define DEF_PATTERN_ACCESSOR(type) /
|
6
|
-
DEF_RB_FREE amatch.c /^#define DEF_RB_FREE(klass, type) /
|
7
|
-
DEF_RB_READER amatch.c /^#define DEF_RB_READER(type, function, name, conver/
|
8
|
-
DEF_RB_WRITER amatch.c /^#define DEF_RB_WRITER(type, function, name, vtype,/
|
9
|
-
FLOAT2C amatch.c /^#define FLOAT2C(obj) RFLOAT(obj)->value$/
|
10
|
-
GET_STRUCT amatch.c /^#define GET_STRUCT(klass) \\$/
|
11
|
-
Init_amatch amatch.c /^void Init_amatch()$/
|
12
|
-
Levenshtein_search amatch.c /^static VALUE Levenshtein_search(Levenshtein *amatc/
|
13
|
-
PairDistance_match amatch.c /^static VALUE PairDistance_match($/
|
14
|
-
amatch_LongestSubstring amatch.c /^static VALUE amatch_LongestSubstring(General *amat/
|
15
|
-
hamming amatch.c /^static VALUE hamming(General *amatch, VALUE string/
|
16
|
-
longest_subsequence amatch.c /^static VALUE longest_subsequence(General *amatch, /
|
17
|
-
rb_Levenshtein_initialize amatch.c /^static VALUE rb_Levenshtein_initialize(VALUE self,/
|
18
|
-
rb_Levenshtein_search amatch.c /^static VALUE rb_Levenshtein_search(VALUE self, VAL/
|
19
|
-
rb_str_Levenshtein_match amatch.c /^static VALUE rb_str_Levenshtein_match(VALUE self, /
|
20
|
-
rb_str_Levenshtein_search amatch.c /^static VALUE rb_str_Levenshtein_search(VALUE self,/
|
21
|
-
rb_str_hamming amatch.c /^static VALUE rb_str_hamming(VALUE self, VALUE stri/
|
22
|
-
rb_str_longest_subsequence amatch.c /^static VALUE rb_str_longest_subsequence(VALUE self/
|
23
|
-
rb_str_longest_substring amatch.c /^static VALUE rb_str_longest_substring(VALUE self, /
|
24
|
-
rb_str_pair_distance amatch.c /^static VALUE rb_str_pair_distance(VALUE self, VALU/
|