amatch 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Binary file
@@ -1,6 +1,6 @@
1
1
  module Amatch
2
2
  # Amatch version
3
- VERSION = '0.3.1'
3
+ VERSION = '0.4.0'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -0,0 +1,93 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestDamerauLevenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ def setup
8
+ @d = 0.000001
9
+ @empty = DamerauLevenshtein.new('')
10
+ @simple = DamerauLevenshtein.new('test')
11
+ @long = DamerauLevenshtein.new('A' * 160)
12
+ end
13
+
14
+ def test_version
15
+ assert_kind_of String, Amatch::VERSION
16
+ end
17
+
18
+ def test_match
19
+ assert_equal 4, @simple.match('')
20
+ assert_equal 0, @simple.match('test')
21
+ assert_equal 1, @simple.match('testa')
22
+ assert_equal 1, @simple.match('atest')
23
+ assert_equal 1, @simple.match('teast')
24
+ assert_equal 1, @simple.match('est')
25
+ assert_equal 1, @simple.match('tes')
26
+ assert_equal 1, @simple.match('tst')
27
+ assert_equal 1, @simple.match('best')
28
+ assert_equal 1, @simple.match('tost')
29
+ assert_equal 1, @simple.match('tesa')
30
+ assert_equal 3, @simple.match('taex')
31
+ assert_equal 6, @simple.match('aaatestbbb')
32
+ assert_equal 1, @simple.match('tset')
33
+ end
34
+
35
+ def test_search
36
+ assert_equal 4, @simple.search('')
37
+ assert_equal 0, @empty.search('')
38
+ assert_equal 0, @empty.search('test')
39
+ assert_equal 0, @simple.search('aaatestbbb')
40
+ assert_equal 3, @simple.search('aaataexbbb')
41
+ assert_equal 4, @simple.search('aaaaaaaaa')
42
+ end
43
+
44
+ def test_array_result
45
+ assert_equal [1, 0], @simple.match(["tets", "test"])
46
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
47
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
48
+ end
49
+
50
+ def test_pattern_setting
51
+ assert_raises(TypeError) { @simple.pattern = :something }
52
+ assert_equal 0, @simple.match('test')
53
+ @simple.pattern = ''
54
+ assert_equal 4, @simple.match('test')
55
+ @simple.pattern = 'test'
56
+ assert_equal 0, @simple.match('test')
57
+ end
58
+
59
+ def test_similar
60
+ assert_in_delta 1, @empty.similar(''), @d
61
+ assert_in_delta 0, @empty.similar('not empty'), @d
62
+ assert_in_delta 0.0, @simple.similar(''), @d
63
+ assert_in_delta 1.0, @simple.similar('test'), @d
64
+ assert_in_delta 0.8, @simple.similar('testa'), @d
65
+ assert_in_delta 0.8, @simple.similar('atest'), @d
66
+ assert_in_delta 0.8, @simple.similar('teast'), @d
67
+ assert_in_delta 0.75, @simple.similar('est'), @d
68
+ assert_in_delta 0.75, @simple.similar('tes'), @d
69
+ assert_in_delta 0.75, @simple.similar('tst'), @d
70
+ assert_in_delta 0.75, @simple.similar('best'), @d
71
+ assert_in_delta 0.75, @simple.similar('tost'), @d
72
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
73
+ assert_in_delta 0.25, @simple.similar('taex'), @d
74
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
75
+ assert_in_delta 0.75, @simple.pattern.damerau_levenshtein_similar('est'), @d
76
+ end
77
+
78
+ def test_transpositions
79
+ assert_in_delta 1.0, 'atestatest'.damerau_levenshtein_similar('atestatest'), @d
80
+ assert_in_delta 0.9, 'atestatest'.damerau_levenshtein_similar('atetsatest'), @d
81
+ assert_in_delta 0.8, 'atestatest'.damerau_levenshtein_similar('atetsatset'), @d
82
+ end
83
+
84
+ def test_long
85
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
86
+ end
87
+
88
+ def test_long2
89
+ a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
90
+ b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
91
+ assert a.damerau_levenshtein_similar(b)
92
+ end
93
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-23 00:00:00.000000000 Z
11
+ date: 2017-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gem_hadar
@@ -52,17 +52,32 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  description: |
56
70
  Amatch is a library for approximate string matching and searching in strings.
57
71
  Several algorithms can be used to do this, and it's also possible to compute a
58
72
  similarity metric number between 0.0 and 1.0 for two given strings.
59
73
  email: flori@ping.de
60
74
  executables:
61
- - agrep.rb
75
+ - agrep
76
+ - dupfind
62
77
  extensions:
63
78
  - ext/extconf.rb
64
79
  extra_rdoc_files:
65
- - README.rdoc
80
+ - README.md
66
81
  - lib/amatch.rb
67
82
  - lib/amatch/polite.rb
68
83
  - lib/amatch/rude.rb
@@ -75,22 +90,25 @@ files:
75
90
  - CHANGES
76
91
  - COPYING
77
92
  - Gemfile
78
- - README.rdoc
93
+ - README.md
79
94
  - Rakefile
80
95
  - VERSION
81
96
  - amatch.gemspec
82
- - bin/agrep.rb
97
+ - bin/agrep
98
+ - bin/dupfind
83
99
  - ext/amatch_ext.c
84
100
  - ext/common.h
85
101
  - ext/extconf.rb
86
102
  - ext/pair.c
87
103
  - ext/pair.h
104
+ - images/amatch_ext.png
88
105
  - install.rb
89
106
  - lib/amatch.rb
90
107
  - lib/amatch/.keep
91
108
  - lib/amatch/polite.rb
92
109
  - lib/amatch/rude.rb
93
110
  - lib/amatch/version.rb
111
+ - tests/test_damerau_levenshtein.rb
94
112
  - tests/test_hamming.rb
95
113
  - tests/test_jaro.rb
96
114
  - tests/test_jaro_winkler.rb
@@ -101,14 +119,14 @@ files:
101
119
  - tests/test_sellers.rb
102
120
  homepage: http://github.com/flori/amatch
103
121
  licenses:
104
- - GPL
122
+ - Apache-2.0
105
123
  metadata: {}
106
124
  post_install_message:
107
125
  rdoc_options:
108
126
  - "--title"
109
127
  - Amatch - Approximate Matching
110
128
  - "--main"
111
- - README.rdoc
129
+ - README.md
112
130
  require_paths:
113
131
  - lib
114
132
  - ext
@@ -129,6 +147,7 @@ signing_key:
129
147
  specification_version: 4
130
148
  summary: Approximate String Matching library
131
149
  test_files:
150
+ - tests/test_damerau_levenshtein.rb
132
151
  - tests/test_hamming.rb
133
152
  - tests/test_jaro.rb
134
153
  - tests/test_jaro_winkler.rb
@@ -1,128 +0,0 @@
1
- = amatch - Approximate Matching Extension for Ruby
2
-
3
- == Description
4
-
5
- This is a collection of classes that can be used for Approximate
6
- matching, searching, and comparing of Strings. They implement algorithms
7
- that compute the Levenshtein edit distance, Sellers edit distance, the
8
- Hamming distance, the longest common subsequence length, the longest common
9
- substring length, the pair distance metric, the Jaro-Winkler metric.
10
-
11
- == Download
12
-
13
- The latest version of <b>amatch</b> can be found at
14
-
15
- * http://rubyforge.org/frs/?group_id=390
16
-
17
- Online Documentation should be located at
18
-
19
- * http://amatch.rubyforge.org
20
-
21
- == Installation
22
-
23
- Just type into the command line as root:
24
-
25
- # ruby install.rb
26
-
27
- If you have installed rake (rake.rubyforge.org), you can also type:
28
-
29
- # rake install
30
-
31
- To install this extension as a gem type
32
-
33
- # gem install amatch
34
-
35
- == Examples
36
- require 'amatch'
37
- # => true
38
- include Amatch
39
- # => Object
40
-
41
- m = Sellers.new("pattern")
42
- # => #<Amatch::Sellers:0x40366324>
43
- m.match("pattren")
44
- # => 2.0
45
- m.substitution = m.insertion = 3
46
- # => 3
47
- m.match("pattren")
48
- # => 4.0
49
- m.reset_weights
50
- # => #<Amatch::Sellers:0x40366324>
51
- m.match(["pattren","parent"])
52
- # => [2.0, 4.0]
53
- m.search("abcpattrendef")
54
- # => 2.0
55
-
56
- m = Levenshtein.new("pattern")
57
- # => #<Amatch::Levenshtein:0x4035919c>
58
- m.match("pattren")
59
- # => 2
60
- m.search("abcpattrendef")
61
- # => 2
62
- "pattern language".levenshtein_similar("language of patterns")
63
- # => 0.2
64
-
65
- m = Hamming.new("pattern")
66
- # => #<Amatch::Hamming:0x40350858>
67
- m.match("pattren")
68
- # => 2
69
- "pattern language".hamming_similar("language of patterns")
70
- # => 0.1
71
-
72
- m = PairDistance.new("pattern")
73
- # => #<Amatch::PairDistance:0x40349be8>
74
- m.match("pattr en")
75
- # => 0.545454545454545
76
- m.match("pattr en", nil)
77
- # => 0.461538461538462
78
- m.match("pattr en", /t+/)
79
- # => 0.285714285714286
80
- "pattern language".pair_distance_similar("language of patterns")
81
- # => 0.928571428571429
82
-
83
- m = LongestSubsequence.new("pattern")
84
- # => #<Amatch::LongestSubsequence:0x4033e900>
85
- m.match("pattren")
86
- # => 6
87
- "pattern language".longest_subsequence_similar("language of patterns")
88
- # => 0.4
89
-
90
- m = LongestSubstring.new("pattern")
91
- # => #<Amatch::LongestSubstring:0x403378d0>
92
- m.match("pattren")
93
- # => 4
94
- "pattern language".longest_substring_similar("language of patterns")
95
- # => 0.4
96
-
97
- m = Jaro.new("pattern")
98
- # => #<Amatch::Jaro:0x363b70>
99
- m.match("paTTren")
100
- # => 0.952380952380952
101
- m.ignore_case = false
102
- m.match("paTTren")
103
- # => 0.742857142857143
104
- "pattern language".jaro_similar("language of patterns")
105
- # => 0.672222222222222
106
-
107
- m = JaroWinkler.new("pattern")
108
- # #<Amatch::JaroWinkler:0x3530b8>
109
- m.match("paTTren")
110
- # => 0.971428571712403
111
- m.ignore_case = false
112
- m.match("paTTren")
113
- # => 0.79428571505206
114
- m.scaling_factor = 0.05
115
- m.match("pattren")
116
- # => 0.961904762046678
117
- "pattern language".jarowinkler_similar("language of patterns")
118
- # => 0.672222222222222
119
-
120
- == Author
121
-
122
- Florian Frank mailto:flori@ping.de
123
-
124
- == License
125
-
126
- This is free software; you can redistribute it and/or modify it under
127
- the terms of the GNU General Public License Version 2 as published by
128
- the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html