damerau-levenshtein 1.1.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -1
- data/README.md +20 -6
- data/damerau-levenshtein.gemspec +7 -6
- data/ext/damerau_levenshtein/damerau_levenshtein.c +7 -7
- data/lib/damerau-levenshtein.rb +18 -1
- data/lib/damerau-levenshtein/damerau_levenshtein.so +0 -0
- data/lib/damerau-levenshtein/version.rb +1 -1
- metadata +13 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: acbd86c82aab23d17130fda4d7e05aa39d10571d
|
|
4
|
+
data.tar.gz: 8421eaf8a995a2b217ca77a703b0ac328a729202
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3ad4824365414744893442e7c64698f9c0bd0ca4b948b52d53b55f107a89fc5e96971e1bf81ac50a051ed63bf68dcb33493e08af96fbe16aa9d10bd030b0cae6
|
|
7
|
+
data.tar.gz: e1e72f50b29be357115bdf666bb7c7a57c9b88bd1eecf0457d25dba469ebe20cdd65e8b4b58c153d947041ff2dd5f2593743787257bd575d4689416a31065ee3
|
data/CHANGELOG.md
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
damerau-levenshtein CHANGELOG
|
|
2
2
|
=============================
|
|
3
3
|
|
|
4
|
-
1.2.
|
|
4
|
+
1.2.0 -- add edit distance for array of integers (by @azhi)
|
|
5
|
+
|
|
6
|
+
1.1.3 -- add ruby 2.3.1 to travis tests by request from @greysteil
|
|
5
7
|
|
|
6
8
|
1.1.2 -- remove unnecessary production dependencies (by @ixti)
|
|
7
9
|
|
data/README.md
CHANGED
|
@@ -13,7 +13,7 @@ This gem implements pure Levenshtein algorithm, Damerau modification of it
|
|
|
13
13
|
(where 2 character transposition counts as 1 edit distance). It also includes
|
|
14
14
|
Boehmer & Rees 2008 modification of Damerau algorithm, where transposition
|
|
15
15
|
of bigger than 1 character blocks is taken in account as well
|
|
16
|
-
(
|
|
16
|
+
[(Rees 2014)][rees2014].
|
|
17
17
|
|
|
18
18
|
```ruby
|
|
19
19
|
require "damerau-levenshtein"
|
|
@@ -82,10 +82,10 @@ DamerauLevenshtein.distance(string1, string2, block_size, max_distance)
|
|
|
82
82
|
|
|
83
83
|
DamerauLevenshtein.distance takes 4 arguments:
|
|
84
84
|
|
|
85
|
-
* string1
|
|
86
|
-
* string2
|
|
87
|
-
* block_size (default is 1)
|
|
88
|
-
* max_distance (default is 10)
|
|
85
|
+
* `string1`
|
|
86
|
+
* `string2`
|
|
87
|
+
* `block_size` (default is 1)
|
|
88
|
+
* `max_distance` (default is 10)
|
|
89
89
|
|
|
90
90
|
`block_size` determines maximum number of characters in a transposition block:
|
|
91
91
|
|
|
@@ -117,6 +117,17 @@ DamerauLevenshtein.distance("abcdefg", "1234567", 0, 3)
|
|
|
117
117
|
# output: 4 -- it gave up when edit distance exceeded 3
|
|
118
118
|
```
|
|
119
119
|
|
|
120
|
+
`DamerauLevenshtein.string_distance` is an alias of
|
|
121
|
+
`DamerauLevenshtein.distance`
|
|
122
|
+
|
|
123
|
+
`DamerauLevenshtein.array_distance` has the same parameters as
|
|
124
|
+
`DamerauLevenshtein.distance`, but operates on arrays of Integers.
|
|
125
|
+
|
|
126
|
+
```ruby
|
|
127
|
+
DamerauLevenshtein.array_distance([1,2,4], [1,2,3])
|
|
128
|
+
# output: 1
|
|
129
|
+
```
|
|
130
|
+
|
|
120
131
|
Contributing to damerau-levenshtein
|
|
121
132
|
-----------------------------------
|
|
122
133
|
|
|
@@ -146,7 +157,8 @@ Authors
|
|
|
146
157
|
Contributors
|
|
147
158
|
------------
|
|
148
159
|
|
|
149
|
-
[lazylester][lazylester], [Ran Xie][skarlit], [Alexey Zapparov][ixti]
|
|
160
|
+
[lazylester][lazylester], [Ran Xie][skarlit], [Alexey Zapparov][ixti],
|
|
161
|
+
[azhi][azhi]
|
|
150
162
|
|
|
151
163
|
Copyright
|
|
152
164
|
---------
|
|
@@ -168,3 +180,5 @@ further details.
|
|
|
168
180
|
[lazylester]: https://github.com/lazylester
|
|
169
181
|
[skarlit]: https://github.com/Skarlit
|
|
170
182
|
[ixti]: https://github.com/ixti
|
|
183
|
+
[azhi]: https://github.com/azhi
|
|
184
|
+
[rees2014]: https://dx.doi.org/10.1371/journal.pone.0107510
|
data/damerau-levenshtein.gemspec
CHANGED
|
@@ -15,21 +15,22 @@ Gem::Specification.new do |s|
|
|
|
15
15
|
"Damerau modification (where 2 character " \
|
|
16
16
|
"transposition counts as 1 edit distance). It also " \
|
|
17
17
|
"includes Boehmer & Rees 2008 modification, " \
|
|
18
|
-
"to handle transposition in
|
|
18
|
+
"to handle transposition in blocks with more than " \
|
|
19
19
|
"2 characters (Boehmer & Rees 2008)."
|
|
20
20
|
s.files = `git ls-files -z`.split("\x0").
|
|
21
21
|
reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
22
22
|
s.extensions = ["ext/damerau_levenshtein/extconf.rb"]
|
|
23
23
|
s.require_paths = ["lib", "lib/damerau-levenshtein"]
|
|
24
24
|
|
|
25
|
-
s.add_development_dependency "rspec", "~> 3.
|
|
25
|
+
s.add_development_dependency "rspec", "~> 3.5"
|
|
26
|
+
# activesupport >= 5.0 does not support Ruby < 2.2
|
|
26
27
|
s.add_development_dependency "activesupport", "~> 4.2"
|
|
27
|
-
s.add_development_dependency "cucumber", "~> 2.
|
|
28
|
+
s.add_development_dependency "cucumber", "~> 2.4"
|
|
28
29
|
s.add_development_dependency "ruby-prof", "~> 0.15"
|
|
29
30
|
s.add_development_dependency "shoulda", "~> 3.5"
|
|
30
|
-
s.add_development_dependency "rubocop", "~> 0.
|
|
31
|
+
s.add_development_dependency "rubocop", "~> 0.41"
|
|
31
32
|
s.add_development_dependency "coveralls", "~> 0.8"
|
|
32
33
|
s.add_development_dependency "bundler", "~> 1.11"
|
|
33
|
-
s.add_development_dependency "rake", "~> 11.
|
|
34
|
-
s.add_development_dependency "rake-compiler", "~> 0
|
|
34
|
+
s.add_development_dependency "rake", "~> 11.2"
|
|
35
|
+
s.add_development_dependency "rake-compiler", "~> 1.0"
|
|
35
36
|
end
|
|
@@ -4,14 +4,14 @@ VALUE DamerauLevenshteinBinding = Qnil;
|
|
|
4
4
|
|
|
5
5
|
void Init_damerau_levenshtein();
|
|
6
6
|
|
|
7
|
-
VALUE
|
|
7
|
+
VALUE method_internal_distance(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VALUE _max_distance);
|
|
8
8
|
|
|
9
9
|
void Init_damerau_levenshtein() {
|
|
10
10
|
DamerauLevenshteinBinding = rb_define_module("DamerauLevenshteinBinding");
|
|
11
|
-
rb_define_method(DamerauLevenshteinBinding, "
|
|
11
|
+
rb_define_method(DamerauLevenshteinBinding, "internal_distance", method_internal_distance, 4);
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
VALUE
|
|
14
|
+
VALUE method_internal_distance(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VALUE _max_distance){
|
|
15
15
|
VALUE *sv = RARRAY_PTR(_s);
|
|
16
16
|
VALUE *tv = RARRAY_PTR(_t);
|
|
17
17
|
int i, i1, j, j1, k, half_tl, cost, *d, distance, del, ins, subs, transp, block;
|
|
@@ -24,8 +24,8 @@ VALUE method_distance_utf(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VAL
|
|
|
24
24
|
int max_distance = NUM2INT(_max_distance);
|
|
25
25
|
int sl = (int) RARRAY_LEN(_s);
|
|
26
26
|
int tl = (int) RARRAY_LEN(_t);
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
long long s[sl];
|
|
28
|
+
long long t[tl];
|
|
29
29
|
|
|
30
30
|
if (block_size == 0) {
|
|
31
31
|
pure_levenshtein = 1;
|
|
@@ -39,8 +39,8 @@ VALUE method_distance_utf(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VAL
|
|
|
39
39
|
if (sl == 1 && tl == 1 && sv[0] != tv[0]) return INT2NUM(1);
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
for (i=0; i < sl; i++) s[i] =
|
|
43
|
-
for (i=0; i < tl; i++) t[i] =
|
|
42
|
+
for (i=0; i < sl; i++) s[i] = NUM2LL(sv[i]);
|
|
43
|
+
for (i=0; i < tl; i++) t[i] = NUM2LL(tv[i]);
|
|
44
44
|
|
|
45
45
|
sl++;
|
|
46
46
|
tl++;
|
data/lib/damerau-levenshtein.rb
CHANGED
|
@@ -12,6 +12,23 @@ module DamerauLevenshtein
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def self.distance(str1, str2, block_size = 1, max_distance = 10)
|
|
15
|
-
|
|
15
|
+
internal_distance(
|
|
16
|
+
str1.unpack("U*"), str2.unpack("U*"),
|
|
17
|
+
block_size, max_distance
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.string_distance(*args)
|
|
22
|
+
distance(*args)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.array_distance(array1, array2, block_size = 1, max_distance = 10)
|
|
26
|
+
internal_distance(array1, array2, block_size, max_distance)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# keep backward compatibility - internal_distance was called distance_utf
|
|
30
|
+
# before
|
|
31
|
+
def self.distance_utf(*args)
|
|
32
|
+
internal_distance(*args)
|
|
16
33
|
end
|
|
17
34
|
end
|
|
Binary file
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: damerau-levenshtein
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dmitry Mozzherin
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-
|
|
11
|
+
date: 2016-09-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|
|
@@ -16,14 +16,14 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '3.
|
|
19
|
+
version: '3.5'
|
|
20
20
|
type: :development
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '3.
|
|
26
|
+
version: '3.5'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: activesupport
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -44,14 +44,14 @@ dependencies:
|
|
|
44
44
|
requirements:
|
|
45
45
|
- - "~>"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '2.
|
|
47
|
+
version: '2.4'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '2.
|
|
54
|
+
version: '2.4'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: ruby-prof
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -86,14 +86,14 @@ dependencies:
|
|
|
86
86
|
requirements:
|
|
87
87
|
- - "~>"
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
|
-
version: '0.
|
|
89
|
+
version: '0.41'
|
|
90
90
|
type: :development
|
|
91
91
|
prerelease: false
|
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
|
94
94
|
- - "~>"
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
|
-
version: '0.
|
|
96
|
+
version: '0.41'
|
|
97
97
|
- !ruby/object:Gem::Dependency
|
|
98
98
|
name: coveralls
|
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -128,31 +128,31 @@ dependencies:
|
|
|
128
128
|
requirements:
|
|
129
129
|
- - "~>"
|
|
130
130
|
- !ruby/object:Gem::Version
|
|
131
|
-
version: '11.
|
|
131
|
+
version: '11.2'
|
|
132
132
|
type: :development
|
|
133
133
|
prerelease: false
|
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
|
135
135
|
requirements:
|
|
136
136
|
- - "~>"
|
|
137
137
|
- !ruby/object:Gem::Version
|
|
138
|
-
version: '11.
|
|
138
|
+
version: '11.2'
|
|
139
139
|
- !ruby/object:Gem::Dependency
|
|
140
140
|
name: rake-compiler
|
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
|
142
142
|
requirements:
|
|
143
143
|
- - "~>"
|
|
144
144
|
- !ruby/object:Gem::Version
|
|
145
|
-
version: '0
|
|
145
|
+
version: '1.0'
|
|
146
146
|
type: :development
|
|
147
147
|
prerelease: false
|
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
|
149
149
|
requirements:
|
|
150
150
|
- - "~>"
|
|
151
151
|
- !ruby/object:Gem::Version
|
|
152
|
-
version: '0
|
|
152
|
+
version: '1.0'
|
|
153
153
|
description: This gem implements pure Levenshtein algorithm, Damerau modification
|
|
154
154
|
(where 2 character transposition counts as 1 edit distance). It also includes Boehmer
|
|
155
|
-
& Rees 2008 modification, to handle transposition in
|
|
155
|
+
& Rees 2008 modification, to handle transposition in blocks with more than 2 characters
|
|
156
156
|
(Boehmer & Rees 2008).
|
|
157
157
|
email: dmozzherin@gmail.com
|
|
158
158
|
executables: []
|