amatch 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,94 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+ require 'test_levenshtein'
4
+
5
+ class TC_Sellers < TC_Levenshtein
6
+ include Amatch
7
+
8
+ D = 0.000001
9
+
10
+ def setup
11
+ @empty = Sellers.new('')
12
+ @simple = Sellers.new('test')
13
+ end
14
+
15
+ def test_weights
16
+ assert_in_delta 1, @simple.substitution, D
17
+ assert_in_delta 1, @simple.insertion, D
18
+ assert_in_delta 1, @simple.deletion, D
19
+ @simple.insertion = 1
20
+ @simple.substitution = @simple.deletion = 1000
21
+ assert_in_delta 1, @simple.match('tst'), D
22
+ assert_in_delta 1, @simple.search('bbbtstccc'), D
23
+ @simple.deletion = 1
24
+ @simple.substitution = @simple.insertion = 1000
25
+ assert_in_delta 1, @simple.match('tedst'), D
26
+ assert_in_delta 1, @simple.search('bbbtedstccc'), D
27
+ @simple.substitution = 1
28
+ @simple.deletion = @simple.insertion = 1000
29
+ assert_in_delta 1, @simple.match('tast'), D
30
+ assert_in_delta 1, @simple.search('bbbtastccc'), D
31
+ @simple.insertion = 0.5
32
+ @simple.substitution = @simple.deletion = 1000
33
+ assert_in_delta 0.5, @simple.match('tst'), D
34
+ assert_in_delta 0.5, @simple.search('bbbtstccc'), D
35
+ @simple.deletion = 0.5
36
+ @simple.substitution = @simple.insertion = 1000
37
+ assert_in_delta 0.5, @simple.match('tedst'), D
38
+ assert_in_delta 0.5, @simple.search('bbbtedstccc'), D
39
+ @simple.substitution = 0.5
40
+ @simple.deletion = @simple.insertion = 1000
41
+ assert_in_delta 0.5, @simple.match('tast'), D
42
+ assert_in_delta 0.5, @simple.search('bbbtastccc'), D
43
+ @simple.reset_weights
44
+ assert_in_delta 1, @simple.substitution, D
45
+ assert_in_delta 1, @simple.insertion, D
46
+ assert_in_delta 1, @simple.deletion, D
47
+ end
48
+
49
+ def test_weight_exceptions
50
+ assert_raises(TypeError) { @simple.substitution = :something }
51
+ assert_raises(TypeError) { @simple.insertion = :something }
52
+ assert_raises(TypeError) { @simple.deletion = :something }
53
+ end
54
+
55
+ def test_similar
56
+ assert_in_delta 0.0, @simple.similar(''), D
57
+ assert_in_delta 1.0, @simple.similar('test'), D
58
+ assert_in_delta 0.8, @simple.similar('testa'), D
59
+ assert_in_delta 0.8, @simple.similar('atest'), D
60
+ assert_in_delta 0.8, @simple.similar('teast'), D
61
+ assert_in_delta 0.75, @simple.similar('est'), D
62
+ assert_in_delta 0.75, @simple.similar('tes'), D
63
+ assert_in_delta 0.75, @simple.similar('tst'), D
64
+ assert_in_delta 0.75, @simple.similar('best'), D
65
+ assert_in_delta 0.75, @simple.similar('tost'), D
66
+ assert_in_delta 0.75, @simple.similar('tesa'), D
67
+ assert_in_delta 0.25, @simple.similar('taex'), D
68
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
69
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
70
+ end
71
+
72
+ def test_similar
73
+ assert_in_delta 1, @empty.similar(''), D
74
+ assert_in_delta 0, @empty.similar('not empty'), D
75
+ assert_in_delta 0.0, @simple.similar(''), D
76
+ assert_in_delta 1.0, @simple.similar('test'), D
77
+ assert_in_delta 0.8, @simple.similar('testa'), D
78
+ assert_in_delta 0.8, @simple.similar('atest'), D
79
+ assert_in_delta 0.8, @simple.similar('teast'), D
80
+ assert_in_delta 0.75, @simple.similar('est'), D
81
+ assert_in_delta 0.75, @simple.similar('tes'), D
82
+ assert_in_delta 0.75, @simple.similar('tst'), D
83
+ assert_in_delta 0.75, @simple.similar('best'), D
84
+ assert_in_delta 0.75, @simple.similar('tost'), D
85
+ assert_in_delta 0.75, @simple.similar('tesa'), D
86
+ assert_in_delta 0.25, @simple.similar('taex'), D
87
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
88
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
89
+ @simple.insertion = 1
90
+ @simple.substitution = @simple.deletion = 2
91
+ assert_in_delta 0.875, @simple.similar('tst'), D
92
+ end
93
+ end
94
+ # vim: set et sw=2 ts=2:
metadata CHANGED
@@ -3,15 +3,17 @@ rubygems_version: 0.8.10
3
3
  specification_version: 1
4
4
  name: amatch
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.5
7
- date: 2005-05-26
6
+ version: 0.2.0
7
+ date: 2005-06-01
8
8
  summary: Approximate String Matching library
9
9
  require_paths:
10
- - lib
10
+ - ext
11
11
  email: flori@ping.de
12
12
  homepage: http://amatch.rubyforge.org
13
13
  rubyforge_project: amatch
14
- description: Amatch is a library for approximate string matching and searching using a dynamic programming algorithm to compute the Levenstein distance between strings.
14
+ description: "Amatch is a library for approximate string matching and searching in strings.
15
+ Several algorithms can be used to do this, and it's also possible to compute a
16
+ similarity metric number between 0.0 and 1.0 for two given strings."
15
17
  autorequire: amatch
16
18
  default_executable: agrep.rb
17
19
  bindir: bin
@@ -36,15 +38,31 @@ files:
36
38
  - ext/MANIFEST
37
39
  - ext/amatch.c
38
40
  - ext/extconf.rb
41
+ - ext/amatch.bundle
42
+ - ext/tags
43
+ - ext/pair.h
44
+ - ext/pair.c
39
45
  - Rakefile
40
46
  - VERSION
41
- - amatch.txt.en
42
47
  - install.rb
48
+ - config.save
49
+ - InstalledFiles
43
50
  - tests
44
- - tests/test.rb
51
+ - tests/test_levenshtein.rb
52
+ - tests/test_hamming.rb
53
+ - tests/test_pair_distance.rb
54
+ - tests/runner.rb
55
+ - tests/test_sellers.rb
56
+ - tests/test_longest_subsequence.rb
57
+ - tests/test_longest_substring.rb
45
58
  test_files:
46
- - tests/test.rb
47
- rdoc_options: []
59
+ - tests/runner.rb
60
+ rdoc_options:
61
+ - "--title"
62
+ - "Amatch -- Approximate Matching"
63
+ - "--main"
64
+ - Amatch
65
+ - "--line-numbers"
48
66
  extra_rdoc_files: []
49
67
  executables:
50
68
  - agrep.rb
@@ -1,117 +0,0 @@
1
- AMatch
2
-
3
- Approximate Matching/Searching/Comparing
4
-
5
- SYNOPSIS
6
-
7
- require 'amatch'
8
-
9
- m = Amatch.new("pattern")
10
-
11
- p m.match("pattren")
12
- p m.match(["pattren","parent"])
13
- p m.matchr("pattren")
14
- p m.compare("pattren")
15
- p m.comparer("pattren")
16
- p m.compare("pattn")
17
- p m.comparer("pattn")
18
- p m.search("abcpattrendef")
19
- p m.searchr("abcpattrendef")
20
-
21
- DESCRIPTION
22
-
23
- This class enables your programs to do approximate matching, searching and
24
- comparing of strings. It uses an algorithm that calculates the Levenstein
25
- distance between those strings to implement those features.
26
-
27
- The Levenstein edit distance is defined as the minimal costs involved to
28
- transform one string into another by using three elementary operations:
29
- deletion, insertion and substitution of a character. To transform "water" into
30
- "wine", for instance, you have to substitute ?a -> i?: "witer", ?t -> ?n:
31
- "winer" and delete ?r: "wine". The edit distance between "water" and "wine" is
32
- 3, because you have to apply three operations. The edit distance between
33
- "wine" and "wine" is 0, of course: no operation is necessary for the
34
- transformation -- they're already the same string. It's easy to see that more
35
- similar strings have smaller edit distances than strings that differ a lot.
36
-
37
- You can als use different weights for every operation to prefer special
38
- operations over others. There are three different kinds of match methods
39
- defined in this class: "match" computes the Levenstein distance between a
40
- pattern and some strings, "search" searches in some text for a special pattern
41
- returning a minimal distance, "compare" calculates a value that can be used to
42
- define a partial order between strings in relation to a given pattern. It's
43
- also possible to compute a relative distance. This floating point value is
44
- computed as absolute distance / length of search pattern.
45
-
46
- CONSTRUCTOR
47
-
48
- - Amatch#new(pattern)
49
-
50
- constructs an Amatch object and initializes it with 'pattern'. If no 'pattern'
51
- is given it has to be set with Amatch#pattern before matching.
52
-
53
- METHODS
54
-
55
- - Amatch#pattern pattern string to match against
56
-
57
- - Amatch#subw weight of one substitution (type Fixnum)
58
-
59
- - Amatch#delw weight of one deletion (type Fixnum)
60
-
61
- - Amatch#insw weight of one insertion (type Fixnum)
62
-
63
- - Amatch#resetw resets all weights to their default values (=1).
64
-
65
- The following methods require the parameter 'strings'. This parameter can be
66
- of type String or Array of Strings. The method executes the matching operation
67
- and returns a number if a string was given. If an array of strings was given
68
- it returns an array of numbers.
69
-
70
- - Amatch#match(strings)
71
-
72
- calculates the absolute edit distance(s) between 'pattern' and 'strings' =
73
- the Levenstein distance in char operations. See also Amatch#pattern.
74
-
75
- - Amatch#matchr(strings)
76
-
77
- calculates the relative edit distance as float. This value is defined as the
78
- edit distance divided by the length of 'pattern'. See also Amatch#pattern.
79
-
80
- - Amatch#search(strings)
81
-
82
- searches 'pattern' in strings and returns the edit distance by greedy
83
- trimming prefixes or postfixes of the match.
84
-
85
- - Amatch#searchr(strings)
86
-
87
- does the same as Amatch#search but divides the edit distance by the length
88
- of 'pattern' and returns the value as float.
89
-
90
- - Amatch#compare(strings)
91
-
92
- calculates the same absolute value like Amatch#match. The sign of the result
93
- value is negative if the strings are shorter than 'pattern' or positive
94
- else.
95
-
96
- - Amatch#comparer(strings)
97
-
98
- calculates the same absolute value like Amatch#matchr. The sign of the
99
- result value is negative if the strings are shorter than 'pattern' or
100
- positive else.
101
-
102
- EXAMPLES
103
-
104
- An agrep utility will be installed that demonstrates the usage of this
105
- library.
106
-
107
- AUTHOR
108
-
109
- Florian Frank <flori@ping.de>
110
-
111
- COPYRIGHT
112
-
113
- Copyright (c) 2002 Florian Frank <flori@ping.de>
114
-
115
- This is free software; you can redistribute it and/or modify it under the
116
- terms of the GNU General Public License Version 2 as published by the Free
117
- Software Foundation: http://www.gnu.org/copyleft/gpl.html
@@ -1,94 +0,0 @@
1
- require 'test/unit'
2
- require 'amatch'
3
-
4
- class TC_AmatchTest < Test::Unit::TestCase
5
-
6
- def setup
7
- @matcher = Amatch.new('test')
8
- end
9
-
10
- def test_match
11
- assert(@matcher.match('') == 4)
12
- assert(@matcher.match('test') == 0)
13
- assert(@matcher.match('test') == 0)
14
- assert(@matcher.match('testa') == 1)
15
- assert(@matcher.match('atest') == 1)
16
- assert(@matcher.match('teast') == 1)
17
- assert(@matcher.match('est') == 1)
18
- assert(@matcher.match('tes') == 1)
19
- assert(@matcher.match('tst') == 1)
20
- assert(@matcher.match('best') == 1)
21
- assert(@matcher.match('tost') == 1)
22
- assert(@matcher.match('tesa') == 1)
23
- assert(@matcher.match('taex') == 3)
24
- assert(@matcher.matchr('') == 1.0)
25
- assert(@matcher.matchr('tesa') == 0.25)
26
- assert(@matcher.match('aaatestbbb') == 6)
27
- end
28
-
29
- def test_search
30
- assert(@matcher.search('') == 4)
31
- assert(@matcher.searchr('') == 1.0)
32
- assert(@matcher.search('aaatestbbb') == 0)
33
- assert(@matcher.search('aaataexbbb') == 3)
34
- assert(@matcher.searchr('aaataexbbb') == 0.75)
35
- assert(@matcher.search('aaaaaaaaa') == 4)
36
- assert(@matcher.searchr('aaaaaaaaa') == 1.0)
37
- end
38
-
39
- def test_compare
40
- assert(@matcher.compare('') == -4)
41
- assert(@matcher.comparer('') == -1.0)
42
- assert(@matcher.compare('taex') == 3)
43
- assert(@matcher.comparer('tesa') == 0.25)
44
- assert(@matcher.compare('aaatestbbb') == 6)
45
- assert(@matcher.compare('test') == 0)
46
- assert(@matcher.compare('tex') == -2)
47
- assert(@matcher.comparer('tsa') == -0.5)
48
- assert(@matcher.compare('wxyz') == 4)
49
- assert(@matcher.comparer('wxyz') == 1.0)
50
- assert_raises(TypeError) { @matcher.match(:foo) }
51
- end
52
-
53
- def test_array_result
54
- assert(@matcher.match([]) == []);
55
- assert(@matcher.match(["tets", "test"]) == [2, 0]);
56
- assert(@matcher.matchr(["tets", "test"]) == [0.5, 0]);
57
- assert(@matcher.compare(["tets", "test"]) == [2, 0]);
58
- assert(@matcher.comparer(["tets", "test"]) == [0.5, 0]);
59
- assert(@matcher.search(["tetsaaa", "testaaa"]) == [1, 0]);
60
- assert(@matcher.searchr(["tetsaaa", "testaaa"]) == [0.25, 0]);
61
- assert_raises(TypeError) { @matcher.match([:foo, "bar"]) }
62
- end
63
-
64
- def test_weights
65
- assert(@matcher.subw == 1)
66
- assert(@matcher.insw == 1)
67
- assert(@matcher.delw == 1)
68
- @matcher.subw = 2
69
- assert(@matcher.subw == 2)
70
- assert(@matcher.match('tast') == 2)
71
- @matcher.subw = 1
72
- assert(@matcher.match('tast') == 1)
73
- @matcher.delw = 2
74
- assert(@matcher.delw == 2)
75
- assert(@matcher.match('teist') == 2)
76
- @matcher.insw = 2
77
- assert(@matcher.insw == 2)
78
- assert(@matcher.match('tst') == 2)
79
- @matcher.resetw
80
- assert(@matcher.subw == 1)
81
- assert(@matcher.insw == 1)
82
- assert(@matcher.delw == 1)
83
- @matcher.subw = :something
84
- assert_raises(TypeError) { @matcher.match('anything') }
85
- @matcher.subw = 1
86
- @matcher.insw = :something
87
- assert_raises(TypeError) { @matcher.match('anything') }
88
- @matcher.insw = 1
89
- @matcher.delw = :something
90
- assert_raises(TypeError) { @matcher.match('anything') }
91
- end
92
-
93
- end
94
- # vim: set noet sw=4 ts=4: