amatch 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/pair.c CHANGED
@@ -72,6 +72,8 @@ void pair_print(Pair pair)
72
72
 
73
73
  void pair_array_destroy(PairArray *pair_array)
74
74
  {
75
- xfree(pair_array->pairs);
75
+ if (pair_array->pairs) {
76
+ xfree(pair_array->pairs);
77
+ }
76
78
  xfree(pair_array);
77
79
  }
Binary file
@@ -1,6 +1,6 @@
1
1
  module Amatch
2
2
  # Amatch version
3
- VERSION = '0.3.0'
3
+ VERSION = '0.4.1'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -0,0 +1,93 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestDamerauLevenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ def setup
8
+ @d = 0.000001
9
+ @empty = DamerauLevenshtein.new('')
10
+ @simple = DamerauLevenshtein.new('test')
11
+ @long = DamerauLevenshtein.new('A' * 160)
12
+ end
13
+
14
+ def test_version
15
+ assert_kind_of String, Amatch::VERSION
16
+ end
17
+
18
+ def test_match
19
+ assert_equal 4, @simple.match('')
20
+ assert_equal 0, @simple.match('test')
21
+ assert_equal 1, @simple.match('testa')
22
+ assert_equal 1, @simple.match('atest')
23
+ assert_equal 1, @simple.match('teast')
24
+ assert_equal 1, @simple.match('est')
25
+ assert_equal 1, @simple.match('tes')
26
+ assert_equal 1, @simple.match('tst')
27
+ assert_equal 1, @simple.match('best')
28
+ assert_equal 1, @simple.match('tost')
29
+ assert_equal 1, @simple.match('tesa')
30
+ assert_equal 3, @simple.match('taex')
31
+ assert_equal 6, @simple.match('aaatestbbb')
32
+ assert_equal 1, @simple.match('tset')
33
+ end
34
+
35
+ def test_search
36
+ assert_equal 4, @simple.search('')
37
+ assert_equal 0, @empty.search('')
38
+ assert_equal 0, @empty.search('test')
39
+ assert_equal 0, @simple.search('aaatestbbb')
40
+ assert_equal 3, @simple.search('aaataexbbb')
41
+ assert_equal 4, @simple.search('aaaaaaaaa')
42
+ end
43
+
44
+ def test_array_result
45
+ assert_equal [1, 0], @simple.match(["tets", "test"])
46
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
47
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
48
+ end
49
+
50
+ def test_pattern_setting
51
+ assert_raises(TypeError) { @simple.pattern = :something }
52
+ assert_equal 0, @simple.match('test')
53
+ @simple.pattern = ''
54
+ assert_equal 4, @simple.match('test')
55
+ @simple.pattern = 'test'
56
+ assert_equal 0, @simple.match('test')
57
+ end
58
+
59
+ def test_similar
60
+ assert_in_delta 1, @empty.similar(''), @d
61
+ assert_in_delta 0, @empty.similar('not empty'), @d
62
+ assert_in_delta 0.0, @simple.similar(''), @d
63
+ assert_in_delta 1.0, @simple.similar('test'), @d
64
+ assert_in_delta 0.8, @simple.similar('testa'), @d
65
+ assert_in_delta 0.8, @simple.similar('atest'), @d
66
+ assert_in_delta 0.8, @simple.similar('teast'), @d
67
+ assert_in_delta 0.75, @simple.similar('est'), @d
68
+ assert_in_delta 0.75, @simple.similar('tes'), @d
69
+ assert_in_delta 0.75, @simple.similar('tst'), @d
70
+ assert_in_delta 0.75, @simple.similar('best'), @d
71
+ assert_in_delta 0.75, @simple.similar('tost'), @d
72
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
73
+ assert_in_delta 0.25, @simple.similar('taex'), @d
74
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
75
+ assert_in_delta 0.75, @simple.pattern.damerau_levenshtein_similar('est'), @d
76
+ end
77
+
78
+ def test_transpositions
79
+ assert_in_delta 1.0, 'atestatest'.damerau_levenshtein_similar('atestatest'), @d
80
+ assert_in_delta 0.9, 'atestatest'.damerau_levenshtein_similar('atetsatest'), @d
81
+ assert_in_delta 0.8, 'atestatest'.damerau_levenshtein_similar('atetsatset'), @d
82
+ end
83
+
84
+ def test_long
85
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
86
+ end
87
+
88
+ def test_long2
89
+ a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
90
+ b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
91
+ assert a.damerau_levenshtein_similar(b)
92
+ end
93
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-27 00:00:00.000000000 Z
11
+ date: 2022-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gem_hadar
@@ -16,30 +16,30 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.0.0
19
+ version: 1.12.0
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.0.0
26
+ version: 1.12.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: test-unit
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.3'
33
+ version: '3.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.3'
40
+ version: '3.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: utils
42
+ name: all_images
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -53,50 +53,45 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: rake
56
+ name: tins
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '10'
62
- - - "<"
63
- - !ruby/object:Gem::Version
64
- version: '11.0'
65
- type: :development
61
+ version: '1.0'
62
+ type: :runtime
66
63
  prerelease: false
67
64
  version_requirements: !ruby/object:Gem::Requirement
68
65
  requirements:
69
66
  - - "~>"
70
67
  - !ruby/object:Gem::Version
71
- version: '10'
72
- - - "<"
73
- - !ruby/object:Gem::Version
74
- version: '11.0'
68
+ version: '1.0'
75
69
  - !ruby/object:Gem::Dependency
76
- name: tins
70
+ name: mize
77
71
  requirement: !ruby/object:Gem::Requirement
78
72
  requirements:
79
- - - "~>"
73
+ - - ">="
80
74
  - !ruby/object:Gem::Version
81
- version: '1.0'
75
+ version: '0'
82
76
  type: :runtime
83
77
  prerelease: false
84
78
  version_requirements: !ruby/object:Gem::Requirement
85
79
  requirements:
86
- - - "~>"
80
+ - - ">="
87
81
  - !ruby/object:Gem::Version
88
- version: '1.0'
82
+ version: '0'
89
83
  description: |
90
84
  Amatch is a library for approximate string matching and searching in strings.
91
85
  Several algorithms can be used to do this, and it's also possible to compute a
92
86
  similarity metric number between 0.0 and 1.0 for two given strings.
93
87
  email: flori@ping.de
94
88
  executables:
95
- - agrep.rb
89
+ - agrep
90
+ - dupfind
96
91
  extensions:
97
92
  - ext/extconf.rb
98
93
  extra_rdoc_files:
99
- - README.rdoc
94
+ - README.md
100
95
  - lib/amatch.rb
101
96
  - lib/amatch/polite.rb
102
97
  - lib/amatch/rude.rb
@@ -104,27 +99,27 @@ extra_rdoc_files:
104
99
  - ext/amatch_ext.c
105
100
  - ext/pair.c
106
101
  files:
107
- - ".gitignore"
108
- - ".travis.yml"
109
102
  - CHANGES
110
103
  - COPYING
111
104
  - Gemfile
112
- - README.rdoc
105
+ - README.md
113
106
  - Rakefile
114
- - VERSION
115
107
  - amatch.gemspec
116
- - bin/agrep.rb
108
+ - bin/agrep
109
+ - bin/dupfind
117
110
  - ext/amatch_ext.c
118
111
  - ext/common.h
119
112
  - ext/extconf.rb
120
113
  - ext/pair.c
121
114
  - ext/pair.h
115
+ - images/amatch_ext.png
122
116
  - install.rb
123
117
  - lib/amatch.rb
124
118
  - lib/amatch/.keep
125
119
  - lib/amatch/polite.rb
126
120
  - lib/amatch/rude.rb
127
121
  - lib/amatch/version.rb
122
+ - tests/test_damerau_levenshtein.rb
128
123
  - tests/test_hamming.rb
129
124
  - tests/test_jaro.rb
130
125
  - tests/test_jaro_winkler.rb
@@ -134,14 +129,15 @@ files:
134
129
  - tests/test_pair_distance.rb
135
130
  - tests/test_sellers.rb
136
131
  homepage: http://github.com/flori/amatch
137
- licenses: []
132
+ licenses:
133
+ - Apache-2.0
138
134
  metadata: {}
139
- post_install_message:
135
+ post_install_message:
140
136
  rdoc_options:
141
137
  - "--title"
142
138
  - Amatch - Approximate Matching
143
139
  - "--main"
144
- - README.rdoc
140
+ - README.md
145
141
  require_paths:
146
142
  - lib
147
143
  - ext
@@ -149,19 +145,19 @@ required_ruby_version: !ruby/object:Gem::Requirement
149
145
  requirements:
150
146
  - - ">="
151
147
  - !ruby/object:Gem::Version
152
- version: '0'
148
+ version: '2.4'
153
149
  required_rubygems_version: !ruby/object:Gem::Requirement
154
150
  requirements:
155
151
  - - ">="
156
152
  - !ruby/object:Gem::Version
157
153
  version: '0'
158
154
  requirements: []
159
- rubyforge_project:
160
- rubygems_version: 2.2.2
161
- signing_key:
155
+ rubygems_version: 3.3.13
156
+ signing_key:
162
157
  specification_version: 4
163
158
  summary: Approximate String Matching library
164
159
  test_files:
160
+ - tests/test_damerau_levenshtein.rb
165
161
  - tests/test_hamming.rb
166
162
  - tests/test_jaro.rb
167
163
  - tests/test_jaro_winkler.rb
data/.gitignore DELETED
@@ -1,6 +0,0 @@
1
- .*.sw[pon]
2
- .AppleDouble
3
- .bundle
4
- .rbx
5
- Gemfile.lock
6
- pkg
data/.travis.yml DELETED
@@ -1,10 +0,0 @@
1
- rvm:
2
- - 2.0.0
3
- - 2.1.0
4
- - ruby-head
5
- - rbx-18mode
6
- - rbx-19mode
7
- matrix:
8
- allow_failures:
9
- - rvm: rbx-18mode
10
- - rvm: rbx-19mode
data/README.rdoc DELETED
@@ -1,128 +0,0 @@
1
- = amatch - Approximate Matching Extension for Ruby
2
-
3
- == Description
4
-
5
- This is a collection of classes that can be used for Approximate
6
- matching, searching, and comparing of Strings. They implement algorithms
7
- that compute the Levenshtein edit distance, Sellers edit distance, the
8
- Hamming distance, the longest common subsequence length, the longest common
9
- substring length, the pair distance metric, the Jaro-Winkler metric.
10
-
11
- == Download
12
-
13
- The latest version of <b>amatch</b> can be found at
14
-
15
- * http://rubyforge.org/frs/?group_id=390
16
-
17
- Online Documentation should be located at
18
-
19
- * http://amatch.rubyforge.org
20
-
21
- == Installation
22
-
23
- Just type into the command line as root:
24
-
25
- # ruby install.rb
26
-
27
- If you have installed rake (rake.rubyforge.org), you can also type:
28
-
29
- # rake install
30
-
31
- To install this extension as a gem type
32
-
33
- # gem install amatch
34
-
35
- == Examples
36
- require 'amatch'
37
- # => true
38
- include Amatch
39
- # => Object
40
-
41
- m = Sellers.new("pattern")
42
- # => #<Amatch::Sellers:0x40366324>
43
- m.match("pattren")
44
- # => 2.0
45
- m.substitution = m.insertion = 3
46
- # => 3
47
- m.match("pattren")
48
- # => 4.0
49
- m.reset_weights
50
- # => #<Amatch::Sellers:0x40366324>
51
- m.match(["pattren","parent"])
52
- # => [2.0, 4.0]
53
- m.search("abcpattrendef")
54
- # => 2.0
55
-
56
- m = Levenshtein.new("pattern")
57
- # => #<Amatch::Levenshtein:0x4035919c>
58
- m.match("pattren")
59
- # => 2
60
- m.search("abcpattrendef")
61
- # => 2
62
- "pattern language".levenshtein_similar("language of patterns")
63
- # => 0.2
64
-
65
- m = Hamming.new("pattern")
66
- # => #<Amatch::Hamming:0x40350858>
67
- m.match("pattren")
68
- # => 2
69
- "pattern language".hamming_similar("language of patterns")
70
- # => 0.1
71
-
72
- m = PairDistance.new("pattern")
73
- # => #<Amatch::PairDistance:0x40349be8>
74
- m.match("pattr en")
75
- # => 0.545454545454545
76
- m.match("pattr en", nil)
77
- # => 0.461538461538462
78
- m.match("pattr en", /t+/)
79
- # => 0.285714285714286
80
- "pattern language".pair_distance_similar("language of patterns")
81
- # => 0.928571428571429
82
-
83
- m = LongestSubsequence.new("pattern")
84
- # => #<Amatch::LongestSubsequence:0x4033e900>
85
- m.match("pattren")
86
- # => 6
87
- "pattern language".longest_subsequence_similar("language of patterns")
88
- # => 0.4
89
-
90
- m = LongestSubstring.new("pattern")
91
- # => #<Amatch::LongestSubstring:0x403378d0>
92
- m.match("pattren")
93
- # => 4
94
- "pattern language".longest_substring_similar("language of patterns")
95
- # => 0.4
96
-
97
- m = Jaro.new("pattern")
98
- # => #<Amatch::Jaro:0x363b70>
99
- m.match("paTTren")
100
- # => 0.952380952380952
101
- m.ignore_case = false
102
- m.match("paTTren")
103
- # => 0.742857142857143
104
- "pattern language".jaro_similar("language of patterns")
105
- # => 0.672222222222222
106
-
107
- m = JaroWinkler.new("pattern")
108
- # #<Amatch::JaroWinkler:0x3530b8>
109
- m.match("paTTren")
110
- # => 0.971428571712403
111
- m.ignore_case = false
112
- m.match("paTTren")
113
- # => 0.79428571505206
114
- m.scaling_factor = 0.05
115
- m.match("pattren")
116
- # => 0.961904762046678
117
- "pattern language".jarowinkler_similar("language of patterns")
118
- # => 0.672222222222222
119
-
120
- == Author
121
-
122
- Florian Frank mailto:flori@ping.de
123
-
124
- == License
125
-
126
- This is free software; you can redistribute it and/or modify it under
127
- the terms of the GNU General Public License Version 2 as published by
128
- the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.3.0