levenshtein-ffi 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile +8 -0
- data/README.markdown +60 -0
- data/Rakefile +19 -0
- data/VERSION +1 -0
- data/ext/levenshtein/.gitignore +3 -0
- data/ext/levenshtein/extconf.rb +2 -0
- data/ext/levenshtein/levenshtein.c +68 -0
- data/ext/levenshtein/levenshtein.h +1 -0
- data/levenshtein-ffi.gemspec +63 -0
- data/lib/levenshtein.rb +14 -0
- data/spec/levenshtein_spec.rb +24 -0
- data/spec/spec_helper.rb +3 -0
- metadata +121 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.markdown
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
levenshtein-ffi
|
|
2
|
+
===============
|
|
3
|
+
|
|
4
|
+
Converted to FFI by David Balatero for Ruby portability.
|
|
5
|
+
|
|
6
|
+
This gem originally based on levenshtein.
|
|
7
|
+
|
|
8
|
+
Tested on:
|
|
9
|
+
|
|
10
|
+
* MRI 1.8.6
|
|
11
|
+
* MRI 1.8.7
|
|
12
|
+
* MRI 1.9.1
|
|
13
|
+
* MRI 1.9.2
|
|
14
|
+
* Rubinius 1.1.0
|
|
15
|
+
|
|
16
|
+
Including in Gemfile
|
|
17
|
+
====================
|
|
18
|
+
|
|
19
|
+
gem 'levenshtein-ffi', :require => 'levenshtein'
|
|
20
|
+
|
|
21
|
+
Original README
|
|
22
|
+
===============
|
|
23
|
+
|
|
24
|
+
The levenshtein module implements fast Damerau-Levenshtein edit distance
|
|
25
|
+
computation in O(n) memory and O(n^2) time, using a C wrapper. The module has a
|
|
26
|
+
single function:
|
|
27
|
+
|
|
28
|
+
require 'levenshtein'
|
|
29
|
+
Levenshtein.distance("string1", "string2") == 1 # returns true
|
|
30
|
+
|
|
31
|
+
This function can be used as a drop-in replacement for
|
|
32
|
+
Text::Levenshtein.levenshtein, which is pure Ruby and rather slow. That's it!
|
|
33
|
+
|
|
34
|
+
The code is made available under the following BSD license:
|
|
35
|
+
|
|
36
|
+
Copyright (c) 2009, Schuyler Erle.
|
|
37
|
+
All rights reserved.
|
|
38
|
+
|
|
39
|
+
Redistribution and use in source and binary forms, with or without
|
|
40
|
+
modification, are permitted provided that the following conditions are met:
|
|
41
|
+
|
|
42
|
+
* Redistributions of source code must retain the above copyright notice,
|
|
43
|
+
this list of conditions and the following disclaimer.
|
|
44
|
+
|
|
45
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
46
|
+
this list of conditions and the following disclaimer in the documentation
|
|
47
|
+
and/or other materials provided with the distribution.
|
|
48
|
+
|
|
49
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
50
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
51
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
52
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
53
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
54
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
55
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
56
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
57
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
58
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
59
|
+
|
|
60
|
+
- end -
|
data/Rakefile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
begin
|
|
2
|
+
require 'jeweler'
|
|
3
|
+
Jeweler::Tasks.new do |gemspec|
|
|
4
|
+
gemspec.name = "levenshtein-ffi"
|
|
5
|
+
gemspec.summary = "An FFI version of the levenshtein gem."
|
|
6
|
+
gemspec.description = "Provides a fast, cross-Ruby implementation of the levenshtein distance algorithm."
|
|
7
|
+
gemspec.email = "dbalatero@gmail.com"
|
|
8
|
+
gemspec.homepage = "http://github.com/dbalatero/levenshtein-ffi"
|
|
9
|
+
gemspec.authors = ["David Balatero"]
|
|
10
|
+
gemspec.add_dependency "ffi"
|
|
11
|
+
gemspec.add_development_dependency "rspec"
|
|
12
|
+
gemspec.add_development_dependency "jeweler"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
Jeweler::GemcutterTasks.new
|
|
16
|
+
rescue LoadError
|
|
17
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
|
18
|
+
end
|
|
19
|
+
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1.0.0
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# include <string.h>
|
|
2
|
+
# include <stdlib.h>
|
|
3
|
+
|
|
4
|
+
# ifdef LEV_CASE_INSENSITIVE
|
|
5
|
+
# include <ctype.h>
|
|
6
|
+
# define eq(x, y) (tolower(x) == tolower(y))
|
|
7
|
+
# else
|
|
8
|
+
# define eq(x, y) ((x) == (y))
|
|
9
|
+
# endif
|
|
10
|
+
|
|
11
|
+
# define min(x, y) ((x) < (y) ? (x) : (y))
|
|
12
|
+
|
|
13
|
+
unsigned int levenshtein (const char *word1, const char *word2) {
|
|
14
|
+
size_t len1 = strlen(word1),
|
|
15
|
+
len2 = strlen(word2);
|
|
16
|
+
unsigned int *v = calloc(len2 + 1, sizeof(unsigned int));
|
|
17
|
+
unsigned int i, j, current, next, cost;
|
|
18
|
+
|
|
19
|
+
/* strip common prefixes */
|
|
20
|
+
while (len1 > 0 && len2 > 0 && eq(word1[0], word2[0]))
|
|
21
|
+
word1++, word2++, len1--, len2--;
|
|
22
|
+
|
|
23
|
+
/* handle degenerate cases */
|
|
24
|
+
if (!len1) return len2;
|
|
25
|
+
if (!len2) return len1;
|
|
26
|
+
|
|
27
|
+
/* initialize the column vector */
|
|
28
|
+
for (j = 0; j < len2 + 1; j++)
|
|
29
|
+
v[j] = j;
|
|
30
|
+
|
|
31
|
+
for (i = 0; i < len1; i++) {
|
|
32
|
+
/* set the value of the first row */
|
|
33
|
+
current = i + 1;
|
|
34
|
+
/* for each row in the column, compute the cost */
|
|
35
|
+
for (j = 0; j < len2; j++) {
|
|
36
|
+
/*
|
|
37
|
+
* cost of replacement is 0 if the two chars are the same, or have
|
|
38
|
+
* been transposed with the chars immediately before. otherwise 1.
|
|
39
|
+
*/
|
|
40
|
+
cost = !(eq(word1[i], word2[j]) || (i && j &&
|
|
41
|
+
eq(word1[i-1], word2[j]) && eq(word1[i],word2[j-1])));
|
|
42
|
+
/* find the least cost of insertion, deletion, or replacement */
|
|
43
|
+
next = min(min( v[j+1] + 1,
|
|
44
|
+
current + 1 ),
|
|
45
|
+
v[j] + cost );
|
|
46
|
+
/* stash the previous row's cost in the column vector */
|
|
47
|
+
v[j] = current;
|
|
48
|
+
/* make the cost of the next transition current */
|
|
49
|
+
current = next;
|
|
50
|
+
}
|
|
51
|
+
/* keep the final cost at the bottom of the column */
|
|
52
|
+
v[len2] = next;
|
|
53
|
+
}
|
|
54
|
+
free(v);
|
|
55
|
+
return next;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# ifdef TEST
|
|
59
|
+
# include <stdio.h>
|
|
60
|
+
# include "levenshtein.h"
|
|
61
|
+
|
|
62
|
+
int main (int argc, char **argv) {
|
|
63
|
+
unsigned int distance;
|
|
64
|
+
if (argc < 3) return -1;
|
|
65
|
+
distance = levenshtein(argv[1], argv[2]);
|
|
66
|
+
printf("%s vs %s: %u\n", argv[1], argv[2],distance);
|
|
67
|
+
}
|
|
68
|
+
# endif
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
unsigned levenshtein(const char *, const char *);
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Generated by jeweler
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.name = %q{levenshtein-ffi}
|
|
8
|
+
s.version = "1.0.0"
|
|
9
|
+
|
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
+
s.authors = ["David Balatero"]
|
|
12
|
+
s.date = %q{2010-11-03}
|
|
13
|
+
s.description = %q{Provides a fast, cross-Ruby implementation of the levenshtein distance algorithm.}
|
|
14
|
+
s.email = %q{dbalatero@gmail.com}
|
|
15
|
+
s.extensions = ["ext/levenshtein/extconf.rb"]
|
|
16
|
+
s.extra_rdoc_files = [
|
|
17
|
+
"README.markdown"
|
|
18
|
+
]
|
|
19
|
+
s.files = [
|
|
20
|
+
".gitignore",
|
|
21
|
+
"Gemfile",
|
|
22
|
+
"README.markdown",
|
|
23
|
+
"Rakefile",
|
|
24
|
+
"VERSION",
|
|
25
|
+
"ext/levenshtein/.gitignore",
|
|
26
|
+
"ext/levenshtein/extconf.rb",
|
|
27
|
+
"ext/levenshtein/levenshtein.c",
|
|
28
|
+
"ext/levenshtein/levenshtein.h",
|
|
29
|
+
"levenshtein-ffi.gemspec",
|
|
30
|
+
"lib/levenshtein.rb",
|
|
31
|
+
"spec/levenshtein_spec.rb",
|
|
32
|
+
"spec/spec_helper.rb"
|
|
33
|
+
]
|
|
34
|
+
s.homepage = %q{http://github.com/dbalatero/levenshtein-ffi}
|
|
35
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
|
36
|
+
s.require_paths = ["lib"]
|
|
37
|
+
s.rubygems_version = %q{1.3.7}
|
|
38
|
+
s.summary = %q{An FFI version of the levenshtein gem.}
|
|
39
|
+
s.test_files = [
|
|
40
|
+
"spec/levenshtein_spec.rb",
|
|
41
|
+
"spec/spec_helper.rb"
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
if s.respond_to? :specification_version then
|
|
45
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
46
|
+
s.specification_version = 3
|
|
47
|
+
|
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
49
|
+
s.add_runtime_dependency(%q<ffi>, [">= 0"])
|
|
50
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
|
51
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
|
52
|
+
else
|
|
53
|
+
s.add_dependency(%q<ffi>, [">= 0"])
|
|
54
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
|
55
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
56
|
+
end
|
|
57
|
+
else
|
|
58
|
+
s.add_dependency(%q<ffi>, [">= 0"])
|
|
59
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
|
60
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
data/lib/levenshtein.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
require 'ffi'
|
|
2
|
+
|
|
3
|
+
module Levenshtein
|
|
4
|
+
extend FFI::Library
|
|
5
|
+
|
|
6
|
+
library = File.dirname(__FILE__) + "/../ext/levenshtein/levenshtein"
|
|
7
|
+
begin
|
|
8
|
+
ffi_lib(library)
|
|
9
|
+
rescue LoadError
|
|
10
|
+
ffi_lib(library + ".bundle")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
attach_function :distance, :levenshtein, [:string, :string], :int
|
|
14
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Levenshtein do
|
|
4
|
+
before(:each) do
|
|
5
|
+
@fixtures = [
|
|
6
|
+
["hello", "hello", 0],
|
|
7
|
+
["hello", "helo", 1],
|
|
8
|
+
["hello", "jello", 1],
|
|
9
|
+
["hello", "helol", 1],
|
|
10
|
+
["hello", "hellol", 1],
|
|
11
|
+
["hello", "heloll", 2],
|
|
12
|
+
["hello", "cheese", 4],
|
|
13
|
+
["hello", "saint", 5],
|
|
14
|
+
["hello", "", 5],
|
|
15
|
+
]
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "should calculate correct distances" do
|
|
19
|
+
@fixtures.each do |w1, w2, d|
|
|
20
|
+
Levenshtein.distance(w1, w2).should == d
|
|
21
|
+
Levenshtein.distance(w2, w1).should == d
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: levenshtein-ffi
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 23
|
|
5
|
+
prerelease: false
|
|
6
|
+
segments:
|
|
7
|
+
- 1
|
|
8
|
+
- 0
|
|
9
|
+
- 0
|
|
10
|
+
version: 1.0.0
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- David Balatero
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2010-11-03 00:00:00 -07:00
|
|
19
|
+
default_executable:
|
|
20
|
+
dependencies:
|
|
21
|
+
- !ruby/object:Gem::Dependency
|
|
22
|
+
name: ffi
|
|
23
|
+
prerelease: false
|
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
hash: 3
|
|
30
|
+
segments:
|
|
31
|
+
- 0
|
|
32
|
+
version: "0"
|
|
33
|
+
type: :runtime
|
|
34
|
+
version_requirements: *id001
|
|
35
|
+
- !ruby/object:Gem::Dependency
|
|
36
|
+
name: rspec
|
|
37
|
+
prerelease: false
|
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
|
39
|
+
none: false
|
|
40
|
+
requirements:
|
|
41
|
+
- - ">="
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
hash: 3
|
|
44
|
+
segments:
|
|
45
|
+
- 0
|
|
46
|
+
version: "0"
|
|
47
|
+
type: :development
|
|
48
|
+
version_requirements: *id002
|
|
49
|
+
- !ruby/object:Gem::Dependency
|
|
50
|
+
name: jeweler
|
|
51
|
+
prerelease: false
|
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
|
53
|
+
none: false
|
|
54
|
+
requirements:
|
|
55
|
+
- - ">="
|
|
56
|
+
- !ruby/object:Gem::Version
|
|
57
|
+
hash: 3
|
|
58
|
+
segments:
|
|
59
|
+
- 0
|
|
60
|
+
version: "0"
|
|
61
|
+
type: :development
|
|
62
|
+
version_requirements: *id003
|
|
63
|
+
description: Provides a fast, cross-Ruby implementation of the levenshtein distance algorithm.
|
|
64
|
+
email: dbalatero@gmail.com
|
|
65
|
+
executables: []
|
|
66
|
+
|
|
67
|
+
extensions:
|
|
68
|
+
- ext/levenshtein/extconf.rb
|
|
69
|
+
extra_rdoc_files:
|
|
70
|
+
- README.markdown
|
|
71
|
+
files:
|
|
72
|
+
- .gitignore
|
|
73
|
+
- Gemfile
|
|
74
|
+
- README.markdown
|
|
75
|
+
- Rakefile
|
|
76
|
+
- VERSION
|
|
77
|
+
- ext/levenshtein/.gitignore
|
|
78
|
+
- ext/levenshtein/extconf.rb
|
|
79
|
+
- ext/levenshtein/levenshtein.c
|
|
80
|
+
- ext/levenshtein/levenshtein.h
|
|
81
|
+
- levenshtein-ffi.gemspec
|
|
82
|
+
- lib/levenshtein.rb
|
|
83
|
+
- spec/levenshtein_spec.rb
|
|
84
|
+
- spec/spec_helper.rb
|
|
85
|
+
has_rdoc: true
|
|
86
|
+
homepage: http://github.com/dbalatero/levenshtein-ffi
|
|
87
|
+
licenses: []
|
|
88
|
+
|
|
89
|
+
post_install_message:
|
|
90
|
+
rdoc_options:
|
|
91
|
+
- --charset=UTF-8
|
|
92
|
+
require_paths:
|
|
93
|
+
- lib
|
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
95
|
+
none: false
|
|
96
|
+
requirements:
|
|
97
|
+
- - ">="
|
|
98
|
+
- !ruby/object:Gem::Version
|
|
99
|
+
hash: 3
|
|
100
|
+
segments:
|
|
101
|
+
- 0
|
|
102
|
+
version: "0"
|
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
|
+
none: false
|
|
105
|
+
requirements:
|
|
106
|
+
- - ">="
|
|
107
|
+
- !ruby/object:Gem::Version
|
|
108
|
+
hash: 3
|
|
109
|
+
segments:
|
|
110
|
+
- 0
|
|
111
|
+
version: "0"
|
|
112
|
+
requirements: []
|
|
113
|
+
|
|
114
|
+
rubyforge_project:
|
|
115
|
+
rubygems_version: 1.3.7
|
|
116
|
+
signing_key:
|
|
117
|
+
specification_version: 3
|
|
118
|
+
summary: An FFI version of the levenshtein gem.
|
|
119
|
+
test_files:
|
|
120
|
+
- spec/levenshtein_spec.rb
|
|
121
|
+
- spec/spec_helper.rb
|