amatch 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ module Amatch
2
+ # Amatch version
3
+ VERSION = '0.2.4'
4
+ VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
5
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
7
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
8
+ end
@@ -1,7 +1,7 @@
1
1
  require 'test/unit'
2
2
  require 'amatch'
3
3
 
4
- class TC_Hamming < Test::Unit::TestCase
4
+ class TestHamming < Test::Unit::TestCase
5
5
  include Amatch
6
6
 
7
7
  D = 0.000001
@@ -56,4 +56,3 @@ class TC_Hamming < Test::Unit::TestCase
56
56
  assert_in_delta 1.0, @long.similar(@long.pattern), D
57
57
  end
58
58
  end
59
- # vim: set et sw=2 ts=2:
@@ -0,0 +1,29 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + "/../ext/amatch"
3
+
4
+ class TestJaro < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.0005
8
+
9
+ def setup
10
+ @martha = Jaro.new('Martha')
11
+ @dwayne = Jaro.new('dwayne')
12
+ @dixon = Jaro.new('DIXON')
13
+ @one = Jaro.new('one')
14
+ end
15
+
16
+ def test_case
17
+ @martha.ignore_case = true
18
+ assert_in_delta 0.944, @martha.match('MARHTA'), D
19
+ @martha.ignore_case = false
20
+ assert_in_delta 0.444, @martha.match('MARHTA'), D
21
+ end
22
+
23
+ def test_match
24
+ assert_in_delta 0.944, @martha.match('MARHTA'), D
25
+ assert_in_delta 0.822, @dwayne.match('DUANE'), D
26
+ assert_in_delta 0.767, @dixon.match('DICKSONX'), D
27
+ assert_in_delta 0.667, @one.match('orange'), D
28
+ end
29
+ end
@@ -0,0 +1,38 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestJaroWinkler < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ D = 0.0005
8
+
9
+ def setup
10
+ @martha = JaroWinkler.new('Martha')
11
+ @dwayne = JaroWinkler.new('dwayne')
12
+ @dixon = JaroWinkler.new('DIXON')
13
+ @one = JaroWinkler.new("one")
14
+ end
15
+
16
+ def test_case
17
+ @martha.ignore_case = true
18
+ assert_in_delta 0.961, @martha.match('MARHTA'), D
19
+ @martha.ignore_case = false
20
+ assert_in_delta 0.500, @martha.match('MARHTA'), D
21
+ end
22
+
23
+ def test_match
24
+ assert_in_delta 0.961, @martha.match('MARHTA'), D
25
+ assert_in_delta 0.840, @dwayne.match('DUANE'), D
26
+ assert_in_delta 0.813, @dixon.match('DICKSONX'), D
27
+ assert_in_delta 0, @one.match('two'), D
28
+ assert_in_delta 0.700, @one.match('orange'), D
29
+ end
30
+
31
+ def test_scaling_factor
32
+ assert_in_delta 0.1, @martha.scaling_factor, 0.0000001
33
+ @martha.scaling_factor = 0.2
34
+ assert_in_delta 0.978, @martha.match('MARHTA'), D
35
+ @martha.scaling_factor = 0.5 # this is far too high
36
+ assert_in_delta 1.028, @martha.match('MARHTA'), D
37
+ end
38
+ end
@@ -1,23 +1,16 @@
1
1
  require 'test/unit'
2
2
  require 'amatch'
3
3
 
4
- class TC_Levenshtein < Test::Unit::TestCase
4
+ class TestLevenshtein < Test::Unit::TestCase
5
5
  include Amatch
6
6
 
7
- D = 0.000001
8
-
9
7
  def setup
8
+ @d = 0.000001
10
9
  @empty = Levenshtein.new('')
11
10
  @simple = Levenshtein.new('test')
12
11
  @long = Levenshtein.new('A' * 160)
13
12
  end
14
13
 
15
- def test_long
16
- a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
17
- b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
18
- p a.levenshtein_similar b
19
- end
20
-
21
14
  def test_match
22
15
  assert_equal 4, @simple.match('')
23
16
  assert_equal 0, @simple.match('test')
@@ -60,26 +53,31 @@ class TC_Levenshtein < Test::Unit::TestCase
60
53
  end
61
54
 
62
55
  def test_similar
63
- assert_in_delta 1, @empty.similar(''), D
64
- assert_in_delta 0, @empty.similar('not empty'), D
65
- assert_in_delta 0.0, @simple.similar(''), D
66
- assert_in_delta 1.0, @simple.similar('test'), D
67
- assert_in_delta 0.8, @simple.similar('testa'), D
68
- assert_in_delta 0.8, @simple.similar('atest'), D
69
- assert_in_delta 0.8, @simple.similar('teast'), D
70
- assert_in_delta 0.75, @simple.similar('est'), D
71
- assert_in_delta 0.75, @simple.similar('tes'), D
72
- assert_in_delta 0.75, @simple.similar('tst'), D
73
- assert_in_delta 0.75, @simple.similar('best'), D
74
- assert_in_delta 0.75, @simple.similar('tost'), D
75
- assert_in_delta 0.75, @simple.similar('tesa'), D
76
- assert_in_delta 0.25, @simple.similar('taex'), D
77
- assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
78
- assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
56
+ assert_in_delta 1, @empty.similar(''), @d
57
+ assert_in_delta 0, @empty.similar('not empty'), @d
58
+ assert_in_delta 0.0, @simple.similar(''), @d
59
+ assert_in_delta 1.0, @simple.similar('test'), @d
60
+ assert_in_delta 0.8, @simple.similar('testa'), @d
61
+ assert_in_delta 0.8, @simple.similar('atest'), @d
62
+ assert_in_delta 0.8, @simple.similar('teast'), @d
63
+ assert_in_delta 0.75, @simple.similar('est'), @d
64
+ assert_in_delta 0.75, @simple.similar('tes'), @d
65
+ assert_in_delta 0.75, @simple.similar('tst'), @d
66
+ assert_in_delta 0.75, @simple.similar('best'), @d
67
+ assert_in_delta 0.75, @simple.similar('tost'), @d
68
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
69
+ assert_in_delta 0.25, @simple.similar('taex'), @d
70
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
71
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), @d
79
72
  end
80
73
 
81
74
  def test_long
82
- assert_in_delta 1.0, @long.similar(@long.pattern), D
75
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
76
+ end
77
+
78
+ def test_long2
79
+ a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
80
+ b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
81
+ assert a.levenshtein_similar(b)
83
82
  end
84
83
  end
85
- # vim: set et sw=2 ts=2:
@@ -1,7 +1,7 @@
1
1
  require 'test/unit'
2
2
  require 'amatch'
3
3
 
4
- class TC_LongestSubsequence < Test::Unit::TestCase
4
+ class TestLongestSubsequence < Test::Unit::TestCase
5
5
  include Amatch
6
6
 
7
7
  D = 0.000001
@@ -59,4 +59,3 @@ class TC_LongestSubsequence < Test::Unit::TestCase
59
59
  assert_in_delta 1.0, @long.similar(@long.pattern), D
60
60
  end
61
61
  end
62
- # vim: set et sw=2 ts=2:
@@ -1,7 +1,7 @@
1
1
  require 'test/unit'
2
2
  require 'amatch'
3
3
 
4
- class TC_LongestSubstring < Test::Unit::TestCase
4
+ class TestLongestSubstring < Test::Unit::TestCase
5
5
  include Amatch
6
6
 
7
7
  D = 0.000001
@@ -59,4 +59,3 @@ class TC_LongestSubstring < Test::Unit::TestCase
59
59
  assert_in_delta 1.0, @long.similar(@long.pattern), D
60
60
  end
61
61
  end
62
- # vim: set et sw=2 ts=2:
@@ -1,7 +1,7 @@
1
1
  require 'test/unit'
2
2
  require 'amatch'
3
3
 
4
- class TC_PairDistance < Test::Unit::TestCase
4
+ class TestPairDistance < Test::Unit::TestCase
5
5
  include Amatch
6
6
 
7
7
  D = 0.000001
@@ -84,4 +84,3 @@ class TC_PairDistance < Test::Unit::TestCase
84
84
  assert_in_delta 1.0, @long.similar(@long.pattern), D
85
85
  end
86
86
  end
87
- # vim: set et sw=2 ts=2:
@@ -2,49 +2,48 @@ require 'test/unit'
2
2
  require 'amatch'
3
3
  require 'test_levenshtein'
4
4
 
5
- class TC_Sellers < TC_Levenshtein
5
+ class TestSellers < TestLevenshtein
6
6
  include Amatch
7
-
8
- D = 0.000001
9
-
7
+
10
8
  def setup
9
+ @d = 0.000001
11
10
  @empty = Sellers.new('')
12
11
  @simple = Sellers.new('test')
13
12
  @long = Sellers.new('A' * 160)
14
13
  end
15
14
 
16
15
  def test_weights
17
- assert_in_delta 1, @simple.substitution, D
18
- assert_in_delta 1, @simple.insertion, D
19
- assert_in_delta 1, @simple.deletion, D
16
+ assert_in_delta 1, @simple.substitution, @d
17
+ assert_in_delta 1, @simple.insertion, @d
18
+ assert_in_delta 1, @simple.deletion, @d
20
19
  @simple.insertion = 1
21
20
  @simple.substitution = @simple.deletion = 1000
22
- assert_in_delta 1, @simple.match('tst'), D
23
- assert_in_delta 1, @simple.search('bbbtstccc'), D
21
+ assert_in_delta 1, @simple.match('tst'), @d
22
+ assert_in_delta 1, @simple.search('bbbtstccc'), @d
24
23
  @simple.deletion = 1
25
24
  @simple.substitution = @simple.insertion = 1000
26
- assert_in_delta 1, @simple.match('tedst'), D
27
- assert_in_delta 1, @simple.search('bbbtedstccc'), D
25
+ assert_in_delta 1, @simple.match('tedst'), @d
26
+ assert_in_delta 1, @simple.search('bbbtedstccc'), @d
28
27
  @simple.substitution = 1
29
28
  @simple.deletion = @simple.insertion = 1000
30
- assert_in_delta 1, @simple.match('tast'), D
31
- assert_in_delta 1, @simple.search('bbbtastccc'), D
29
+ assert_in_delta 1, @simple.match('tast'), @d
30
+ assert_in_delta 1, @simple.search('bbbtastccc'), @d
32
31
  @simple.insertion = 0.5
33
32
  @simple.substitution = @simple.deletion = 1000
34
- assert_in_delta 0.5, @simple.match('tst'), D
35
- assert_in_delta 0.5, @simple.search('bbbtstccc'), D
33
+ assert_in_delta 0.5, @simple.match('tst'), @d
34
+ assert_in_delta 0.5, @simple.search('bbbtstccc'), @d
36
35
  @simple.deletion = 0.5
37
36
  @simple.substitution = @simple.insertion = 1000
38
- assert_in_delta 0.5, @simple.match('tedst'), D
39
- assert_in_delta 0.5, @simple.search('bbbtedstccc'), D
37
+ assert_in_delta 0.5, @simple.match('tedst'), @d
38
+ assert_in_delta 0.5, @simple.search('bbbtedstccc'), @d
40
39
  @simple.substitution = 0.5
41
40
  @simple.deletion = @simple.insertion = 1000
42
- assert_in_delta 0.5, @simple.match('tast'), D
43
- assert_in_delta 0.5, @simple.search('bbbtastccc'), D
41
+ assert_in_delta 0.5, @simple.match('tast'), @d
42
+ assert_in_delta 0.5, @simple.search('bbbtastccc'), @d
44
43
  @simple.reset_weights
45
- assert_in_delta 1, @simple.substitution, D
46
- assert_in_delta 1, @simple.insertion, D
47
- assert_in_delta 1, @simple.deletion, D
44
+ assert_in_delta 1, @simple.substitution, @d
45
+ assert_in_delta 1, @simple.insertion, @d
46
+ assert_in_delta 1, @simple.deletion, @d
48
47
  end
49
48
 
50
49
  def test_weight_exceptions
@@ -54,45 +53,44 @@ class TC_Sellers < TC_Levenshtein
54
53
  end
55
54
 
56
55
  def test_similar
57
- assert_in_delta 0.0, @simple.similar(''), D
58
- assert_in_delta 1.0, @simple.similar('test'), D
59
- assert_in_delta 0.8, @simple.similar('testa'), D
60
- assert_in_delta 0.8, @simple.similar('atest'), D
61
- assert_in_delta 0.8, @simple.similar('teast'), D
62
- assert_in_delta 0.75, @simple.similar('est'), D
63
- assert_in_delta 0.75, @simple.similar('tes'), D
64
- assert_in_delta 0.75, @simple.similar('tst'), D
65
- assert_in_delta 0.75, @simple.similar('best'), D
66
- assert_in_delta 0.75, @simple.similar('tost'), D
67
- assert_in_delta 0.75, @simple.similar('tesa'), D
68
- assert_in_delta 0.25, @simple.similar('taex'), D
69
- assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
70
- assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), D
56
+ assert_in_delta 0.0, @simple.similar(''), @d
57
+ assert_in_delta 1.0, @simple.similar('test'), @d
58
+ assert_in_delta 0.8, @simple.similar('testa'), @d
59
+ assert_in_delta 0.8, @simple.similar('atest'), @d
60
+ assert_in_delta 0.8, @simple.similar('teast'), @d
61
+ assert_in_delta 0.75, @simple.similar('est'), @d
62
+ assert_in_delta 0.75, @simple.similar('tes'), @d
63
+ assert_in_delta 0.75, @simple.similar('tst'), @d
64
+ assert_in_delta 0.75, @simple.similar('best'), @d
65
+ assert_in_delta 0.75, @simple.similar('tost'), @d
66
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
67
+ assert_in_delta 0.25, @simple.similar('taex'), @d
68
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
69
+ assert_in_delta 0.75, @simple.pattern.levenshtein_similar('est'), @d
71
70
  end
72
71
 
73
72
  def test_similar
74
- assert_in_delta 1, @empty.similar(''), D
75
- assert_in_delta 0, @empty.similar('not empty'), D
76
- assert_in_delta 0.0, @simple.similar(''), D
77
- assert_in_delta 1.0, @simple.similar('test'), D
78
- assert_in_delta 0.8, @simple.similar('testa'), D
79
- assert_in_delta 0.8, @simple.similar('atest'), D
80
- assert_in_delta 0.8, @simple.similar('teast'), D
81
- assert_in_delta 0.75, @simple.similar('est'), D
82
- assert_in_delta 0.75, @simple.similar('tes'), D
83
- assert_in_delta 0.75, @simple.similar('tst'), D
84
- assert_in_delta 0.75, @simple.similar('best'), D
85
- assert_in_delta 0.75, @simple.similar('tost'), D
86
- assert_in_delta 0.75, @simple.similar('tesa'), D
87
- assert_in_delta 0.25, @simple.similar('taex'), D
88
- assert_in_delta 0.4, @simple.similar('aaatestbbb'), D
73
+ assert_in_delta 1, @empty.similar(''), @d
74
+ assert_in_delta 0, @empty.similar('not empty'), @d
75
+ assert_in_delta 0.0, @simple.similar(''), @d
76
+ assert_in_delta 1.0, @simple.similar('test'), @d
77
+ assert_in_delta 0.8, @simple.similar('testa'), @d
78
+ assert_in_delta 0.8, @simple.similar('atest'), @d
79
+ assert_in_delta 0.8, @simple.similar('teast'), @d
80
+ assert_in_delta 0.75, @simple.similar('est'), @d
81
+ assert_in_delta 0.75, @simple.similar('tes'), @d
82
+ assert_in_delta 0.75, @simple.similar('tst'), @d
83
+ assert_in_delta 0.75, @simple.similar('best'), @d
84
+ assert_in_delta 0.75, @simple.similar('tost'), @d
85
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
86
+ assert_in_delta 0.25, @simple.similar('taex'), @d
87
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
89
88
  @simple.insertion = 1
90
89
  @simple.substitution = @simple.deletion = 2
91
- assert_in_delta 0.875, @simple.similar('tst'), D
90
+ assert_in_delta 0.875, @simple.similar('tst'), @d
92
91
  end
93
92
 
94
93
  def test_long
95
- assert_in_delta 1.0, @long.similar(@long.pattern), D
94
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
96
95
  end
97
96
  end
98
- # vim: set et sw=2 ts=2:
metadata CHANGED
@@ -1,71 +1,92 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
3
- specification_version: 1
4
2
  name: amatch
5
3
  version: !ruby/object:Gem::Version
6
- version: 0.2.3
7
- date: 2006-09-26 00:00:00 +02:00
8
- summary: Approximate String Matching library
9
- require_paths:
10
- - ext
11
- email: flori@ping.de
12
- homepage: http://amatch.rubyforge.org
13
- rubyforge_project: amatch
14
- description: Amatch is a library for approximate string matching and searching in strings. Several algorithms can be used to do this, and it's also possible to compute a similarity metric number between 0.0 and 1.0 for two given strings.
15
- autorequire: amatch
16
- default_executable: agrep.rb
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 0.2.4
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
6
  authors:
29
7
  - Florian Frank
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-26 00:00:00 +02:00
13
+ default_executable: agrep.rb
14
+ dependencies: []
15
+
16
+ description: |
17
+ Amatch is a library for approximate string matching and searching in strings.
18
+ Several algorithms can be used to do this, and it's also possible to compute a
19
+ similarity metric number between 0.0 and 1.0 for two given strings.
20
+
21
+ email: flori@ping.de
22
+ executables:
23
+ - agrep.rb
24
+ extensions:
25
+ - ext/extconf.rb
26
+ extra_rdoc_files:
27
+ - ext/amatch.c
28
+ - lib/amatch/version.rb
29
+ - doc-main.txt
30
30
  files:
31
- - bin
32
- - VERSION
33
- - tests
34
- - GPL
35
- - README.en
36
- - install.rb
37
- - ext
38
- - Rakefile
39
31
  - CHANGES
40
32
  - bin/agrep.rb
41
- - tests/runner.rb
33
+ - VERSION
34
+ - README
35
+ - ext/common.h
36
+ - ext/extconf.rb
37
+ - ext/amatch.c
38
+ - ext/pair.h
39
+ - ext/pair.c
40
+ - Rakefile
41
+ - lib/amatch/version.rb
42
42
  - tests/test_longest_substring.rb
43
- - tests/test_levenshtein.rb
44
43
  - tests/test_hamming.rb
45
- - tests/test_sellers.rb
46
- - tests/test_pair_distance.rb
47
44
  - tests/test_longest_subsequence.rb
48
- - ext/tags
49
- - ext/pair.h
50
- - ext/extconf.rb
51
- - ext/MANIFEST
52
- - ext/pair.c
53
- - ext/amatch.c
54
- test_files:
55
- - tests/runner.rb
45
+ - tests/test_pair_distance.rb
46
+ - tests/test_levenshtein.rb
47
+ - tests/test_jaro.rb
48
+ - tests/test_sellers.rb
49
+ - tests/test_jaro_winkler.rb
50
+ - COPYING
51
+ - install.rb
52
+ - doc-main.txt
53
+ has_rdoc: true
54
+ homepage: http://amatch.rubyforge.org
55
+ licenses: []
56
+
57
+ post_install_message:
56
58
  rdoc_options:
57
- - --title
58
- - Amatch -- Approximate Matching
59
59
  - --main
60
- - Amatch
61
- - --line-numbers
62
- extra_rdoc_files: []
63
-
64
- executables:
65
- - agrep.rb
66
- extensions:
67
- - ext/extconf.rb
60
+ - doc-main.txt
61
+ - --title
62
+ - amatch - Approximate Matching
63
+ require_paths:
64
+ - ext
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: "0"
70
+ version:
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: "0"
76
+ version:
68
77
  requirements: []
69
78
 
70
- dependencies: []
71
-
79
+ rubyforge_project: amatch
80
+ rubygems_version: 1.3.4
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: Approximate String Matching library
84
+ test_files:
85
+ - tests/test_longest_substring.rb
86
+ - tests/test_hamming.rb
87
+ - tests/test_longest_subsequence.rb
88
+ - tests/test_pair_distance.rb
89
+ - tests/test_levenshtein.rb
90
+ - tests/test_jaro.rb
91
+ - tests/test_sellers.rb
92
+ - tests/test_jaro_winkler.rb