amatch 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/pair.c CHANGED
@@ -72,6 +72,8 @@ void pair_print(Pair pair)
72
72
 
73
73
  void pair_array_destroy(PairArray *pair_array)
74
74
  {
75
- xfree(pair_array->pairs);
75
+ if (pair_array->pairs) {
76
+ xfree(pair_array->pairs);
77
+ }
76
78
  xfree(pair_array);
77
79
  }
Binary file
@@ -1,6 +1,6 @@
1
1
  module Amatch
2
2
  # Amatch version
3
- VERSION = '0.3.0'
3
+ VERSION = '0.4.1'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -0,0 +1,93 @@
1
+ require 'test/unit'
2
+ require 'amatch'
3
+
4
+ class TestDamerauLevenshtein < Test::Unit::TestCase
5
+ include Amatch
6
+
7
+ def setup
8
+ @d = 0.000001
9
+ @empty = DamerauLevenshtein.new('')
10
+ @simple = DamerauLevenshtein.new('test')
11
+ @long = DamerauLevenshtein.new('A' * 160)
12
+ end
13
+
14
+ def test_version
15
+ assert_kind_of String, Amatch::VERSION
16
+ end
17
+
18
+ def test_match
19
+ assert_equal 4, @simple.match('')
20
+ assert_equal 0, @simple.match('test')
21
+ assert_equal 1, @simple.match('testa')
22
+ assert_equal 1, @simple.match('atest')
23
+ assert_equal 1, @simple.match('teast')
24
+ assert_equal 1, @simple.match('est')
25
+ assert_equal 1, @simple.match('tes')
26
+ assert_equal 1, @simple.match('tst')
27
+ assert_equal 1, @simple.match('best')
28
+ assert_equal 1, @simple.match('tost')
29
+ assert_equal 1, @simple.match('tesa')
30
+ assert_equal 3, @simple.match('taex')
31
+ assert_equal 6, @simple.match('aaatestbbb')
32
+ assert_equal 1, @simple.match('tset')
33
+ end
34
+
35
+ def test_search
36
+ assert_equal 4, @simple.search('')
37
+ assert_equal 0, @empty.search('')
38
+ assert_equal 0, @empty.search('test')
39
+ assert_equal 0, @simple.search('aaatestbbb')
40
+ assert_equal 3, @simple.search('aaataexbbb')
41
+ assert_equal 4, @simple.search('aaaaaaaaa')
42
+ end
43
+
44
+ def test_array_result
45
+ assert_equal [1, 0], @simple.match(["tets", "test"])
46
+ assert_equal [1, 0], @simple.search(["tetsaaa", "testaaa"])
47
+ assert_raises(TypeError) { @simple.match([:foo, "bar"]) }
48
+ end
49
+
50
+ def test_pattern_setting
51
+ assert_raises(TypeError) { @simple.pattern = :something }
52
+ assert_equal 0, @simple.match('test')
53
+ @simple.pattern = ''
54
+ assert_equal 4, @simple.match('test')
55
+ @simple.pattern = 'test'
56
+ assert_equal 0, @simple.match('test')
57
+ end
58
+
59
+ def test_similar
60
+ assert_in_delta 1, @empty.similar(''), @d
61
+ assert_in_delta 0, @empty.similar('not empty'), @d
62
+ assert_in_delta 0.0, @simple.similar(''), @d
63
+ assert_in_delta 1.0, @simple.similar('test'), @d
64
+ assert_in_delta 0.8, @simple.similar('testa'), @d
65
+ assert_in_delta 0.8, @simple.similar('atest'), @d
66
+ assert_in_delta 0.8, @simple.similar('teast'), @d
67
+ assert_in_delta 0.75, @simple.similar('est'), @d
68
+ assert_in_delta 0.75, @simple.similar('tes'), @d
69
+ assert_in_delta 0.75, @simple.similar('tst'), @d
70
+ assert_in_delta 0.75, @simple.similar('best'), @d
71
+ assert_in_delta 0.75, @simple.similar('tost'), @d
72
+ assert_in_delta 0.75, @simple.similar('tesa'), @d
73
+ assert_in_delta 0.25, @simple.similar('taex'), @d
74
+ assert_in_delta 0.4, @simple.similar('aaatestbbb'), @d
75
+ assert_in_delta 0.75, @simple.pattern.damerau_levenshtein_similar('est'), @d
76
+ end
77
+
78
+ def test_transpositions
79
+ assert_in_delta 1.0, 'atestatest'.damerau_levenshtein_similar('atestatest'), @d
80
+ assert_in_delta 0.9, 'atestatest'.damerau_levenshtein_similar('atetsatest'), @d
81
+ assert_in_delta 0.8, 'atestatest'.damerau_levenshtein_similar('atetsatset'), @d
82
+ end
83
+
84
+ def test_long
85
+ assert_in_delta 1.0, @long.similar(@long.pattern), @d
86
+ end
87
+
88
+ def test_long2
89
+ a = "lost this fantasy, this fantasy, this fantasy, this fantasy, this fantasy, this fantasy\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
90
+ b = "lost\r\n\r\nGood love Neat work\r\n\r\nSuper job Fancy work\r\n\r\nPants job Cool work"
91
+ assert a.damerau_levenshtein_similar(b)
92
+ end
93
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-27 00:00:00.000000000 Z
11
+ date: 2022-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gem_hadar
@@ -16,30 +16,30 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.0.0
19
+ version: 1.12.0
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.0.0
26
+ version: 1.12.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: test-unit
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.3'
33
+ version: '3.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.3'
40
+ version: '3.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: utils
42
+ name: all_images
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -53,50 +53,45 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: rake
56
+ name: tins
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '10'
62
- - - "<"
63
- - !ruby/object:Gem::Version
64
- version: '11.0'
65
- type: :development
61
+ version: '1.0'
62
+ type: :runtime
66
63
  prerelease: false
67
64
  version_requirements: !ruby/object:Gem::Requirement
68
65
  requirements:
69
66
  - - "~>"
70
67
  - !ruby/object:Gem::Version
71
- version: '10'
72
- - - "<"
73
- - !ruby/object:Gem::Version
74
- version: '11.0'
68
+ version: '1.0'
75
69
  - !ruby/object:Gem::Dependency
76
- name: tins
70
+ name: mize
77
71
  requirement: !ruby/object:Gem::Requirement
78
72
  requirements:
79
- - - "~>"
73
+ - - ">="
80
74
  - !ruby/object:Gem::Version
81
- version: '1.0'
75
+ version: '0'
82
76
  type: :runtime
83
77
  prerelease: false
84
78
  version_requirements: !ruby/object:Gem::Requirement
85
79
  requirements:
86
- - - "~>"
80
+ - - ">="
87
81
  - !ruby/object:Gem::Version
88
- version: '1.0'
82
+ version: '0'
89
83
  description: |
90
84
  Amatch is a library for approximate string matching and searching in strings.
91
85
  Several algorithms can be used to do this, and it's also possible to compute a
92
86
  similarity metric number between 0.0 and 1.0 for two given strings.
93
87
  email: flori@ping.de
94
88
  executables:
95
- - agrep.rb
89
+ - agrep
90
+ - dupfind
96
91
  extensions:
97
92
  - ext/extconf.rb
98
93
  extra_rdoc_files:
99
- - README.rdoc
94
+ - README.md
100
95
  - lib/amatch.rb
101
96
  - lib/amatch/polite.rb
102
97
  - lib/amatch/rude.rb
@@ -104,27 +99,27 @@ extra_rdoc_files:
104
99
  - ext/amatch_ext.c
105
100
  - ext/pair.c
106
101
  files:
107
- - ".gitignore"
108
- - ".travis.yml"
109
102
  - CHANGES
110
103
  - COPYING
111
104
  - Gemfile
112
- - README.rdoc
105
+ - README.md
113
106
  - Rakefile
114
- - VERSION
115
107
  - amatch.gemspec
116
- - bin/agrep.rb
108
+ - bin/agrep
109
+ - bin/dupfind
117
110
  - ext/amatch_ext.c
118
111
  - ext/common.h
119
112
  - ext/extconf.rb
120
113
  - ext/pair.c
121
114
  - ext/pair.h
115
+ - images/amatch_ext.png
122
116
  - install.rb
123
117
  - lib/amatch.rb
124
118
  - lib/amatch/.keep
125
119
  - lib/amatch/polite.rb
126
120
  - lib/amatch/rude.rb
127
121
  - lib/amatch/version.rb
122
+ - tests/test_damerau_levenshtein.rb
128
123
  - tests/test_hamming.rb
129
124
  - tests/test_jaro.rb
130
125
  - tests/test_jaro_winkler.rb
@@ -134,14 +129,15 @@ files:
134
129
  - tests/test_pair_distance.rb
135
130
  - tests/test_sellers.rb
136
131
  homepage: http://github.com/flori/amatch
137
- licenses: []
132
+ licenses:
133
+ - Apache-2.0
138
134
  metadata: {}
139
- post_install_message:
135
+ post_install_message:
140
136
  rdoc_options:
141
137
  - "--title"
142
138
  - Amatch - Approximate Matching
143
139
  - "--main"
144
- - README.rdoc
140
+ - README.md
145
141
  require_paths:
146
142
  - lib
147
143
  - ext
@@ -149,19 +145,19 @@ required_ruby_version: !ruby/object:Gem::Requirement
149
145
  requirements:
150
146
  - - ">="
151
147
  - !ruby/object:Gem::Version
152
- version: '0'
148
+ version: '2.4'
153
149
  required_rubygems_version: !ruby/object:Gem::Requirement
154
150
  requirements:
155
151
  - - ">="
156
152
  - !ruby/object:Gem::Version
157
153
  version: '0'
158
154
  requirements: []
159
- rubyforge_project:
160
- rubygems_version: 2.2.2
161
- signing_key:
155
+ rubygems_version: 3.3.13
156
+ signing_key:
162
157
  specification_version: 4
163
158
  summary: Approximate String Matching library
164
159
  test_files:
160
+ - tests/test_damerau_levenshtein.rb
165
161
  - tests/test_hamming.rb
166
162
  - tests/test_jaro.rb
167
163
  - tests/test_jaro_winkler.rb
data/.gitignore DELETED
@@ -1,6 +0,0 @@
1
- .*.sw[pon]
2
- .AppleDouble
3
- .bundle
4
- .rbx
5
- Gemfile.lock
6
- pkg
data/.travis.yml DELETED
@@ -1,10 +0,0 @@
1
- rvm:
2
- - 2.0.0
3
- - 2.1.0
4
- - ruby-head
5
- - rbx-18mode
6
- - rbx-19mode
7
- matrix:
8
- allow_failures:
9
- - rvm: rbx-18mode
10
- - rvm: rbx-19mode
data/README.rdoc DELETED
@@ -1,128 +0,0 @@
1
- = amatch - Approximate Matching Extension for Ruby
2
-
3
- == Description
4
-
5
- This is a collection of classes that can be used for Approximate
6
- matching, searching, and comparing of Strings. They implement algorithms
7
- that compute the Levenshtein edit distance, Sellers edit distance, the
8
- Hamming distance, the longest common subsequence length, the longest common
9
- substring length, the pair distance metric, the Jaro-Winkler metric.
10
-
11
- == Download
12
-
13
- The latest version of <b>amatch</b> can be found at
14
-
15
- * http://rubyforge.org/frs/?group_id=390
16
-
17
- Online Documentation should be located at
18
-
19
- * http://amatch.rubyforge.org
20
-
21
- == Installation
22
-
23
- Just type into the command line as root:
24
-
25
- # ruby install.rb
26
-
27
- If you have installed rake (rake.rubyforge.org), you can also type:
28
-
29
- # rake install
30
-
31
- To install this extension as a gem type
32
-
33
- # gem install amatch
34
-
35
- == Examples
36
- require 'amatch'
37
- # => true
38
- include Amatch
39
- # => Object
40
-
41
- m = Sellers.new("pattern")
42
- # => #<Amatch::Sellers:0x40366324>
43
- m.match("pattren")
44
- # => 2.0
45
- m.substitution = m.insertion = 3
46
- # => 3
47
- m.match("pattren")
48
- # => 4.0
49
- m.reset_weights
50
- # => #<Amatch::Sellers:0x40366324>
51
- m.match(["pattren","parent"])
52
- # => [2.0, 4.0]
53
- m.search("abcpattrendef")
54
- # => 2.0
55
-
56
- m = Levenshtein.new("pattern")
57
- # => #<Amatch::Levenshtein:0x4035919c>
58
- m.match("pattren")
59
- # => 2
60
- m.search("abcpattrendef")
61
- # => 2
62
- "pattern language".levenshtein_similar("language of patterns")
63
- # => 0.2
64
-
65
- m = Hamming.new("pattern")
66
- # => #<Amatch::Hamming:0x40350858>
67
- m.match("pattren")
68
- # => 2
69
- "pattern language".hamming_similar("language of patterns")
70
- # => 0.1
71
-
72
- m = PairDistance.new("pattern")
73
- # => #<Amatch::PairDistance:0x40349be8>
74
- m.match("pattr en")
75
- # => 0.545454545454545
76
- m.match("pattr en", nil)
77
- # => 0.461538461538462
78
- m.match("pattr en", /t+/)
79
- # => 0.285714285714286
80
- "pattern language".pair_distance_similar("language of patterns")
81
- # => 0.928571428571429
82
-
83
- m = LongestSubsequence.new("pattern")
84
- # => #<Amatch::LongestSubsequence:0x4033e900>
85
- m.match("pattren")
86
- # => 6
87
- "pattern language".longest_subsequence_similar("language of patterns")
88
- # => 0.4
89
-
90
- m = LongestSubstring.new("pattern")
91
- # => #<Amatch::LongestSubstring:0x403378d0>
92
- m.match("pattren")
93
- # => 4
94
- "pattern language".longest_substring_similar("language of patterns")
95
- # => 0.4
96
-
97
- m = Jaro.new("pattern")
98
- # => #<Amatch::Jaro:0x363b70>
99
- m.match("paTTren")
100
- # => 0.952380952380952
101
- m.ignore_case = false
102
- m.match("paTTren")
103
- # => 0.742857142857143
104
- "pattern language".jaro_similar("language of patterns")
105
- # => 0.672222222222222
106
-
107
- m = JaroWinkler.new("pattern")
108
- # #<Amatch::JaroWinkler:0x3530b8>
109
- m.match("paTTren")
110
- # => 0.971428571712403
111
- m.ignore_case = false
112
- m.match("paTTren")
113
- # => 0.79428571505206
114
- m.scaling_factor = 0.05
115
- m.match("pattren")
116
- # => 0.961904762046678
117
- "pattern language".jarowinkler_similar("language of patterns")
118
- # => 0.672222222222222
119
-
120
- == Author
121
-
122
- Florian Frank mailto:flori@ping.de
123
-
124
- == License
125
-
126
- This is free software; you can redistribute it and/or modify it under
127
- the terms of the GNU General Public License Version 2 as published by
128
- the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.3.0