amatch 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+ require 'test_levenshtein'
4
+
5
+ class TC_Sellers < TC_Levenshtein
6
+ include Amatch
7
+
8
+ D = 0.000001
9
+
10
+ def setup
11
+ @empty = Sellers.new('')
12
+ @simple = Sellers.new('test')
13
+ end
14
+
15
+ def test_weights
16
+ assert_in_delta 1, @simple.substitution, D
17
+ assert_in_delta 1, @simple.insertion, D
18
+ assert_in_delta 1, @simple.deletion, D
19
+ @simple.insertion = 1
20
+ @simple.substitution = @simple.deletion = 1000
21
+ assert_in_delta 1, @simple.match('tst'), D
22
+ assert_in_delta 1, @simple.search('bbbtstccc'), D
23
+ @simple.deletion = 1
24
+ @simple.substitution = @simple.insertion = 1000
25
+ assert_in_delta 1, @simple.match('tedst'), D
26
+ assert_in_delta 1, @simple.search('bbbtedstccc'), D
27
+ @simple.substitution = 1
28
+ @simple.deletion = @simple.insertion = 1000
29
+ assert_in_delta 1, @simple.match('tast'), D
30
+ assert_in_delta 1, @simple.search('bbbtastccc'), D
31
+ @simple.insertion = 0.5
32
+ @simple.substitution = @simple.deletion = 1000
33
+ assert_in_delta 0.5, @simple.match('tst'), D
34
+ assert_in_delta 0.5, @simple.search('bbbtstccc'), D
35
+ @simple.deletion = 0.5
36
+ @simple.substitution = @simple.insertion = 1000
37
+ assert_in_delta 0.5, @simple.match('tedst'), D
38
+ assert_in_delta 0.5, @simple.search('bbbtedstccc'), D
39
+ @simple.substitution = 0.5
40
+ @simple.deletion = @simple.insertion = 1000
41
+ assert_in_delta 0.5, @simple.match('tast'), D
42
+ assert_in_delta 0.5, @simple.search('bbbtastccc'), D
43
+ @simple.reset_weights
44
+ assert_in_delta 1, @simple.substitution, D
45
+ assert_in_delta 1, @simple.insertion, D
46
+ assert_in_delta 1, @simple.deletion, D
47
+ end
48
+
49
+ def test_weight_exceptions
50
+ assert_raises(TypeError) { @simple.substitution = :something }
51
+ assert_raises(TypeError) { @simple.insertion = :something }
52
+ assert_raises(TypeError) { @simple.deletion = :something }
53
+ end
54
+
55
+ def test_similar
56
+ assert_in_delta 0.0, @simple.similar(''), D
57
+ assert_in_delta 1.0, @simple.similar('test'), D
58
+ assert_in_delta 0.8, @simple.similar('testa'), D
59
+ assert_in_delta 0.8, @simple.similar('atest'), D
60
+ assert_in_delta 0.8, @simple.similar('teast'), D
61
+ assert_in_delta 0.75, @simple.similar('est'), D
62
+ assert_in_delta 0.75, @simple.similar('tes'), D
63
+ assert_in_delta 0.75, @simple.similar('tst'), D
64
+ assert_in_delta 0.75, @simple.similar('best'), D
65
+ assert_in_delta 0.75, @simple.similar('tost'), D
66
+ assert_in_delta 0.75, @simple.similar('tesa'), D
67
+ assert_in_delta 0.25, @simple.similar('taex'), D
68
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
69
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
70
+ end
71
+
72
+ def test_similar
73
+ assert_in_delta 1, @empty.similar(''), D
74
+ assert_in_delta 0, @empty.similar('not empty'), D
75
+ assert_in_delta 0.0, @simple.similar(''), D
76
+ assert_in_delta 1.0, @simple.similar('test'), D
77
+ assert_in_delta 0.8, @simple.similar('testa'), D
78
+ assert_in_delta 0.8, @simple.similar('atest'), D
79
+ assert_in_delta 0.8, @simple.similar('teast'), D
80
+ assert_in_delta 0.75, @simple.similar('est'), D
81
+ assert_in_delta 0.75, @simple.similar('tes'), D
82
+ assert_in_delta 0.75, @simple.similar('tst'), D
83
+ assert_in_delta 0.75, @simple.similar('best'), D
84
+ assert_in_delta 0.75, @simple.similar('tost'), D
85
+ assert_in_delta 0.75, @simple.similar('tesa'), D
86
+ assert_in_delta 0.25, @simple.similar('taex'), D
87
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
88
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
89
+ @simple.insertion = 1
90
+ @simple.substitution = @simple.deletion = 2
91
+ assert_in_delta 0.875, @simple.similar('tst'), D
92
+ end
93
+ end
94
+ # vim: set et sw=2 ts=2:
metadata CHANGED
@@ -3,15 +3,17 @@ rubygems_version: 0.8.10
3
3
  specification_version: 1
4
4
  name: amatch
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.5
7
- date: 2005-05-26
6
+ version: 0.2.0
7
+ date: 2005-06-01
8
8
  summary: Approximate String Matching library
9
9
  require_paths:
10
- - lib
10
+ - ext
11
11
  email: flori@ping.de
12
12
  homepage: http://amatch.rubyforge.org
13
13
  rubyforge_project: amatch
14
- description: Amatch is a library for approximate string matching and searching using a dynamic programming algorithm to compute the Levenstein distance between strings.
14
+ description: "Amatch is a library for approximate string matching and searching in strings.
15
+ Several algorithms can be used to do this, and it's also possible to compute a
16
+ similarity metric number between 0.0 and 1.0 for two given strings."
15
17
  autorequire: amatch
16
18
  default_executable: agrep.rb
17
19
  bindir: bin
@@ -36,15 +38,31 @@ files:
36
38
  - ext/MANIFEST
37
39
  - ext/amatch.c
38
40
  - ext/extconf.rb
41
+ - ext/amatch.bundle
42
+ - ext/tags
43
+ - ext/pair.h
44
+ - ext/pair.c
39
45
  - Rakefile
40
46
  - VERSION
41
- - amatch.txt.en
42
47
  - install.rb
48
+ - config.save
49
+ - InstalledFiles
43
50
  - tests
44
- - tests/test.rb
51
+ - tests/test_levenshtein.rb
52
+ - tests/test_hamming.rb
53
+ - tests/test_pair_distance.rb
54
+ - tests/runner.rb
55
+ - tests/test_sellers.rb
56
+ - tests/test_longest_subsequence.rb
57
+ - tests/test_longest_substring.rb
45
58
  test_files:
46
- - tests/test.rb
47
- rdoc_options: []
59
+ - tests/runner.rb
60
+ rdoc_options:
61
+ - "--title"
62
+ - "Amatch -- Approximate Matching"
63
+ - "--main"
64
+ - Amatch
65
+ - "--line-numbers"
48
66
  extra_rdoc_files: []
49
67
  executables:
50
68
  - agrep.rb
@@ -1,117 +0,0 @@
1
- AMatch
2
-
3
- Approximate Matching/Searching/Comparing
4
-
5
- SYNOPSIS
6
-
7
- require 'amatch'
8
-
9
- m = Amatch.new("pattern")
10
-
11
- p m.match("pattren")
12
- p m.match(["pattren","parent"])
13
- p m.matchr("pattren")
14
- p m.compare("pattren")
15
- p m.comparer("pattren")
16
- p m.compare("pattn")
17
- p m.comparer("pattn")
18
- p m.search("abcpattrendef")
19
- p m.searchr("abcpattrendef")
20
-
21
- DESCRIPTION
22
-
23
- This class enables your programs to do approximate matching, searching and
24
- comparing of strings. It uses an algorithm that calculates the Levenstein
25
- distance between those strings to implement those features.
26
-
27
- The Levenstein edit distance is defined as the minimal costs involved to
28
- transform one string into another by using three elementary operations:
29
- deletion, insertion and substitution of a character. To transform "water" into
30
- "wine", for instance, you have to substitute ?a -> i?: "witer", ?t -> ?n:
31
- "winer" and delete ?r: "wine". The edit distance between "water" and "wine" is
32
- 3, because you have to apply three operations. The edit distance between
33
- "wine" and "wine" is 0, of course: no operation is necessary for the
34
- transformation -- they're already the same string. It's easy to see that more
35
- similar strings have smaller edit distances than strings that differ a lot.
36
-
37
- You can als use different weights for every operation to prefer special
38
- operations over others. There are three different kinds of match methods
39
- defined in this class: "match" computes the Levenstein distance between a
40
- pattern and some strings, "search" searches in some text for a special pattern
41
- returning a minimal distance, "compare" calculates a value that can be used to
42
- define a partial order between strings in relation to a given pattern. It's
43
- also possible to compute a relative distance. This floating point value is
44
- computed as absolute distance / length of search pattern.
45
-
46
- CONSTRUCTOR
47
-
48
- - Amatch#new(pattern)
49
-
50
- constructs an Amatch object and initializes it with 'pattern'. If no 'pattern'
51
- is given it has to be set with Amatch#pattern before matching.
52
-
53
- METHODS
54
-
55
- - Amatch#pattern pattern string to match against
56
-
57
- - Amatch#subw weight of one substitution (type Fixnum)
58
-
59
- - Amatch#delw weight of one deletion (type Fixnum)
60
-
61
- - Amatch#insw weight of one insertion (type Fixnum)
62
-
63
- - Amatch#resetw resets all weights to their default values (=1).
64
-
65
- The following methods require the parameter 'strings'. This parameter can be
66
- of type String or Array of Strings. The method executes the matching operation
67
- and returns a number if a string was given. If an array of strings was given
68
- it returns an array of numbers.
69
-
70
- - Amatch#match(strings)
71
-
72
- calculates the absolute edit distance(s) between 'pattern' and 'strings' =
73
- the Levenstein distance in char operations. See also Amatch#pattern.
74
-
75
- - Amatch#matchr(strings)
76
-
77
- calculates the relative edit distance as float. This value is defined as the
78
- edit distance divided by the length of 'pattern'. See also Amatch#pattern.
79
-
80
- - Amatch#search(strings)
81
-
82
- searches 'pattern' in strings and returns the edit distance by greedy
83
- trimming prefixes or postfixes of the match.
84
-
85
- - Amatch#searchr(strings)
86
-
87
- does the same as Amatch#search but divides the edit distance by the length
88
- of 'pattern' and returns the value as float.
89
-
90
- - Amatch#compare(strings)
91
-
92
- calculates the same absolute value like Amatch#match. The sign of the result
93
- value is negative if the strings are shorter than 'pattern' or positive
94
- else.
95
-
96
- - Amatch#comparer(strings)
97
-
98
- calculates the same absolute value like Amatch#matchr. The sign of the
99
- result value is negative if the strings are shorter than 'pattern' or
100
- positive else.
101
-
102
- EXAMPLES
103
-
104
- An agrep utility will be installed that demonstrates the usage of this
105
- library.
106
-
107
- AUTHOR
108
-
109
- Florian Frank <flori@ping.de>
110
-
111
- COPYRIGHT
112
-
113
- Copyright (c) 2002 Florian Frank <flori@ping.de>
114
-
115
- This is free software; you can redistribute it and/or modify it under the
116
- terms of the GNU General Public License Version 2 as published by the Free
117
- Software Foundation: http://www.gnu.org/copyleft/gpl.html
@@ -1,94 +0,0 @@
1
- require 'test/unit'
2
- require 'amatch'
3
-
4
- class TC_AmatchTest < Test::Unit::TestCase
5
-
6
- def setup
7
- @matcher = Amatch.new('test')
8
- end
9
-
10
- def test_match
11
- assert(@matcher.match('') == 4)
12
- assert(@matcher.match('test') == 0)
13
- assert(@matcher.match('test') == 0)
14
- assert(@matcher.match('testa') == 1)
15
- assert(@matcher.match('atest') == 1)
16
- assert(@matcher.match('teast') == 1)
17
- assert(@matcher.match('est') == 1)
18
- assert(@matcher.match('tes') == 1)
19
- assert(@matcher.match('tst') == 1)
20
- assert(@matcher.match('best') == 1)
21
- assert(@matcher.match('tost') == 1)
22
- assert(@matcher.match('tesa') == 1)
23
- assert(@matcher.match('taex') == 3)
24
- assert(@matcher.matchr('') == 1.0)
25
- assert(@matcher.matchr('tesa') == 0.25)
26
- assert(@matcher.match('aaatestbbb') == 6)
27
- end
28
-
29
- def test_search
30
- assert(@matcher.search('') == 4)
31
- assert(@matcher.searchr('') == 1.0)
32
- assert(@matcher.search('aaatestbbb') == 0)
33
- assert(@matcher.search('aaataexbbb') == 3)
34
- assert(@matcher.searchr('aaataexbbb') == 0.75)
35
- assert(@matcher.search('aaaaaaaaa') == 4)
36
- assert(@matcher.searchr('aaaaaaaaa') == 1.0)
37
- end
38
-
39
- def test_compare
40
- assert(@matcher.compare('') == -4)
41
- assert(@matcher.comparer('') == -1.0)
42
- assert(@matcher.compare('taex') == 3)
43
- assert(@matcher.comparer('tesa') == 0.25)
44
- assert(@matcher.compare('aaatestbbb') == 6)
45
- assert(@matcher.compare('test') == 0)
46
- assert(@matcher.compare('tex') == -2)
47
- assert(@matcher.comparer('tsa') == -0.5)
48
- assert(@matcher.compare('wxyz') == 4)
49
- assert(@matcher.comparer('wxyz') == 1.0)
50
- assert_raises(TypeError) { @matcher.match(:foo) }
51
- end
52
-
53
- def test_array_result
54
- assert(@matcher.match([]) == []);
55
- assert(@matcher.match(["tets", "test"]) == [2, 0]);
56
- assert(@matcher.matchr(["tets", "test"]) == [0.5, 0]);
57
- assert(@matcher.compare(["tets", "test"]) == [2, 0]);
58
- assert(@matcher.comparer(["tets", "test"]) == [0.5, 0]);
59
- assert(@matcher.search(["tetsaaa", "testaaa"]) == [1, 0]);
60
- assert(@matcher.searchr(["tetsaaa", "testaaa"]) == [0.25, 0]);
61
- assert_raises(TypeError) { @matcher.match([:foo, "bar"]) }
62
- end
63
-
64
- def test_weights
65
- assert(@matcher.subw == 1)
66
- assert(@matcher.insw == 1)
67
- assert(@matcher.delw == 1)
68
- @matcher.subw = 2
69
- assert(@matcher.subw == 2)
70
- assert(@matcher.match('tast') == 2)
71
- @matcher.subw = 1
72
- assert(@matcher.match('tast') == 1)
73
- @matcher.delw = 2
74
- assert(@matcher.delw == 2)
75
- assert(@matcher.match('teist') == 2)
76
- @matcher.insw = 2
77
- assert(@matcher.insw == 2)
78
- assert(@matcher.match('tst') == 2)
79
- @matcher.resetw
80
- assert(@matcher.subw == 1)
81
- assert(@matcher.insw == 1)
82
- assert(@matcher.delw == 1)
83
- @matcher.subw = :something
84
- assert_raises(TypeError) { @matcher.match('anything') }
85
- @matcher.subw = 1
86
- @matcher.insw = :something
87
- assert_raises(TypeError) { @matcher.match('anything') }
88
- @matcher.insw = 1
89
- @matcher.delw = :something
90
- assert_raises(TypeError) { @matcher.match('anything') }
91
- end
92
-
93
- end
94
- # vim: set noet sw=4 ts=4: