amatch 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -1,6 +1,6 @@
1
1
  module Amatch
2
2
  # Amatch version
3
- VERSION = '0.3.1'
3
+ VERSION = '0.4.0'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -0,0 +1,93 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestDamerauLevenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ def setup
8
+ @d = 0.000001
9
+ @empty = DamerauLevenshtein.new('')
10
+ @simple = DamerauLevenshtein.new('test')
11
+ @long = DamerauLevenshtein.new('A' * 160)
12
+ end
13
+
14
+ def test_version
15
+ assert_kind_of String, Amatch::VERSION
16
+ end
17
+
18
+ def test_match
19
+ assert_equal 4, @simple.match('')
20
+ assert_equal 0, @simple.match('test')
21
+ assert_equal 1, @simple.match('testa')
22
+ assert_equal 1, @simple.match('atest')
23
+ assert_equal 1, @simple.match('teast')
24
+ assert_equal 1, @simple.match('est')
25
+ assert_equal 1, @simple.match('tes')
26
+ assert_equal 1, @simple.match('tst')
27
+ assert_equal 1, @simple.match('best')
28
+ assert_equal 1, @simple.match('tost')
29
+ assert_equal 1, @simple.match('tesa')
30
+ assert_equal 3, @simple.match('taex')
31
+ assert_equal 6, @simple.match('aaatestbbb')
32
+ assert_equal 1, @simple.match('tset')
33
+ end
34
+
35
+ def test_search
36
+ assert_equal 4, @simple.search('')
37
+ assert_equal 0, @empty.search('')
38
+ assert_equal 0, @empty.search('test')
39
+ assert_equal 0, @simple.search('aaatestbbb')
40
+ assert_equal 3, @simple.search('aaataexbbb')
41
+ assert_equal 4, @simple.search('aaaaaaaaa')
42
+ end
43
+
44
+ def test_array_result
45
+ assert_equal [1, 0], @simple.match(["tets", "test"])
46
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
47
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
48
+ end
49
+
50
+ def test_pattern_setting
51
+ assert_raises(TypeError) { @simple.pattern = :something }
52
+ assert_equal 0, @simple.match('test')
53
+ @simple.pattern = ''
54
+ assert_equal 4, @simple.match('test')
55
+ @simple.pattern = 'test'
56
+ assert_equal 0, @simple.match('test')
57
+ end
58
+
59
+ def test_similar
60
+ assert_in_delta 1, @empty.similar(''), @d
61
+ assert_in_delta 0, @empty.similar('not empty'), @d
62
+ assert_in_delta 0.0, @simple.similar(''), @d
63
+ assert_in_delta 1.0, @simple.similar('test'), @d
64
+ assert_in_delta 0.8, @simple.similar('testa'), @d
65
+ assert_in_delta 0.8, @simple.similar('atest'), @d
66
+ assert_in_delta 0.8, @simple.similar('teast'), @d
67
+ assert_in_delta 0.75, @simple.similar('est'), @d
68
+ assert_in_delta 0.75, @simple.similar('tes'), @d
69
+ assert_in_delta 0.75, @simple.similar('tst'), @d
70
+ assert_in_delta 0.75, @simple.similar('best'), @d
71
+ assert_in_delta 0.75, @simple.similar('tost'), @d
72
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
73
+ assert_in_delta 0.25, @simple.similar('taex'), @d
74
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
75
+ assert_in_delta 0.75, @simple.pattern.damerau_levenshtein_similar('est'), @d
76
+ end
77
+
78
+ def test_transpositions
79
+ assert_in_delta 1.0, 'atestatest'.damerau_levenshtein_similar('atestatest'), @d
80
+ assert_in_delta 0.9, 'atestatest'.damerau_levenshtein_similar('atetsatest'), @d
81
+ assert_in_delta 0.8, 'atestatest'.damerau_levenshtein_similar('atetsatset'), @d
82
+ end
83
+
84
+ def test_long
85
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
86
+ end
87
+
88
+ def test_long2
89
+ a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
90
+ b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
91
+ assert a.damerau_levenshtein_similar(b)
92
+ end
93
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-23 00:00:00.000000000 Z
11
+ date: 2017-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gem_hadar
@@ -52,17 +52,32 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  description: |
56
70
  Amatch is a library for approximate string matching and searching in strings.
57
71
  Several algorithms can be used to do this, and it's also possible to compute a
58
72
  similarity metric number between 0.0 and 1.0 for two given strings.
59
73
  email: flori@ping.de
60
74
  executables:
61
- - agrep.rb
75
+ - agrep
76
+ - dupfind
62
77
  extensions:
63
78
  - ext/extconf.rb
64
79
  extra_rdoc_files:
65
- - README.rdoc
80
+ - README.md
66
81
  - lib/amatch.rb
67
82
  - lib/amatch/polite.rb
68
83
  - lib/amatch/rude.rb
@@ -75,22 +90,25 @@ files:
75
90
  - CHANGES
76
91
  - COPYING
77
92
  - Gemfile
78
- - README.rdoc
93
+ - README.md
79
94
  - Rakefile
80
95
  - VERSION
81
96
  - amatch.gemspec
82
- - bin/agrep.rb
97
+ - bin/agrep
98
+ - bin/dupfind
83
99
  - ext/amatch_ext.c
84
100
  - ext/common.h
85
101
  - ext/extconf.rb
86
102
  - ext/pair.c
87
103
  - ext/pair.h
104
+ - images/amatch_ext.png
88
105
  - install.rb
89
106
  - lib/amatch.rb
90
107
  - lib/amatch/.keep
91
108
  - lib/amatch/polite.rb
92
109
  - lib/amatch/rude.rb
93
110
  - lib/amatch/version.rb
111
+ - tests/test_damerau_levenshtein.rb
94
112
  - tests/test_hamming.rb
95
113
  - tests/test_jaro.rb
96
114
  - tests/test_jaro_winkler.rb
@@ -101,14 +119,14 @@ files:
101
119
  - tests/test_sellers.rb
102
120
  homepage: http://github.com/flori/amatch
103
121
  licenses:
104
- - GPL
122
+ - Apache-2.0
105
123
  metadata: {}
106
124
  post_install_message:
107
125
  rdoc_options:
108
126
  - "--title"
109
127
  - Amatch - Approximate Matching
110
128
  - "--main"
111
- - README.rdoc
129
+ - README.md
112
130
  require_paths:
113
131
  - lib
114
132
  - ext
@@ -129,6 +147,7 @@ signing_key:
129
147
  specification_version: 4
130
148
  summary: Approximate String Matching library
131
149
  test_files:
150
+ - tests/test_damerau_levenshtein.rb
132
151
  - tests/test_hamming.rb
133
152
  - tests/test_jaro.rb
134
153
  - tests/test_jaro_winkler.rb
@@ -1,128 +0,0 @@
1
- = amatch - Approximate Matching Extension for Ruby
2
-
3
- == Description
4
-
5
- This is a collection of classes that can be used for Approximate
6
- matching, searching, and comparing of Strings. They implement algorithms
7
- that compute the Levenshtein edit distance, Sellers edit distance, the
8
- Hamming distance, the longest common subsequence length, the longest common
9
- substring length, the pair distance metric, the Jaro-Winkler metric.
10
-
11
- == Download
12
-
13
- The latest version of <b>amatch</b> can be found at
14
-
15
- * http://rubyforge.org/frs/?group_id=390
16
-
17
- Online Documentation should be located at
18
-
19
- * http://amatch.rubyforge.org
20
-
21
- == Installation
22
-
23
- Just type into the command line as root:
24
-
25
- # ruby install.rb
26
-
27
- If you have installed rake (rake.rubyforge.org), you can also type:
28
-
29
- # rake install
30
-
31
- To install this extension as a gem type
32
-
33
- # gem install amatch
34
-
35
- == Examples
36
- require 'amatch'
37
- # => true
38
- include Amatch
39
- # => Object
40
-
41
- m = Sellers.new("pattern")
42
- # => #<Amatch::Sellers:0x40366324>
43
- m.match("pattren")
44
- # => 2.0
45
- m.substitution = m.insertion = 3
46
- # => 3
47
- m.match("pattren")
48
- # => 4.0
49
- m.reset_weights
50
- # => #<Amatch::Sellers:0x40366324>
51
- m.match(["pattren","parent"])
52
- # => [2.0, 4.0]
53
- m.search("abcpattrendef")
54
- # => 2.0
55
-
56
- m = Levenshtein.new("pattern")
57
- # => #<Amatch::Levenshtein:0x4035919c>
58
- m.match("pattren")
59
- # => 2
60
- m.search("abcpattrendef")
61
- # => 2
62
- "pattern language".levenshtein_similar("language of patterns")
63
- # => 0.2
64
-
65
- m = Hamming.new("pattern")
66
- # => #<Amatch::Hamming:0x40350858>
67
- m.match("pattren")
68
- # => 2
69
- "pattern language".hamming_similar("language of patterns")
70
- # => 0.1
71
-
72
- m = PairDistance.new("pattern")
73
- # => #<Amatch::PairDistance:0x40349be8>
74
- m.match("pattr en")
75
- # => 0.545454545454545
76
- m.match("pattr en", nil)
77
- # => 0.461538461538462
78
- m.match("pattr en", /t+/)
79
- # => 0.285714285714286
80
- "pattern language".pair_distance_similar("language of patterns")
81
- # => 0.928571428571429
82
-
83
- m = LongestSubsequence.new("pattern")
84
- # => #<Amatch::LongestSubsequence:0x4033e900>
85
- m.match("pattren")
86
- # => 6
87
- "pattern language".longest_subsequence_similar("language of patterns")
88
- # => 0.4
89
-
90
- m = LongestSubstring.new("pattern")
91
- # => #<Amatch::LongestSubstring:0x403378d0>
92
- m.match("pattren")
93
- # => 4
94
- "pattern language".longest_substring_similar("language of patterns")
95
- # => 0.4
96
-
97
- m = Jaro.new("pattern")
98
- # => #<Amatch::Jaro:0x363b70>
99
- m.match("paTTren")
100
- # => 0.952380952380952
101
- m.ignore_case = false
102
- m.match("paTTren")
103
- # => 0.742857142857143
104
- "pattern language".jaro_similar("language of patterns")
105
- # => 0.672222222222222
106
-
107
- m = JaroWinkler.new("pattern")
108
- # #<Amatch::JaroWinkler:0x3530b8>
109
- m.match("paTTren")
110
- # => 0.971428571712403
111
- m.ignore_case = false
112
- m.match("paTTren")
113
- # => 0.79428571505206
114
- m.scaling_factor = 0.05
115
- m.match("pattren")
116
- # => 0.961904762046678
117
- "pattern language".jarowinkler_similar("language of patterns")
118
- # => 0.672222222222222
119
-
120
- == Author
121
-
122
- Florian Frank mailto:flori@ping.de
123
-
124
- == License
125
-
126
- This is free software; you can redistribute it and/or modify it under
127
- the terms of the GNU General Public License Version 2 as published by
128
- the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html