vladlev 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +3 -0
- data/Gemfile +3 -0
- data/README.md +43 -0
- data/Rakefile +22 -0
- data/ext/.DS_Store +0 -0
- data/ext/levenshtein/LevenshteinDistance.java +66 -0
- data/ext/levenshtein/extconf.rb +5 -0
- data/ext/levenshtein/levenshtein.c +118 -0
- data/lib/levenshtein.bundle +0 -0
- data/lib/levenshtein.jar +0 -0
- data/lib/vladlev.rb +86 -0
- data/lib/vladlev/levenshtein.rb +49 -0
- data/lib/vladlev/version.rb +3 -0
- data/spec/lib/levenshtein_spec.rb +72 -0
- data/spec/spec_helper.rb +11 -0
- data/vladlev.gemspec +33 -0
- metadata +174 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a3d833003de88f3525f4779667d4ecb83da15562
|
4
|
+
data.tar.gz: 061e99aa0307164452a29d463f2ceddaf8610273
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b8622bb2248712886afcaf79fbf067deea3f01abc4f947f81c68c9d862a55ad1d22d8d3bfea1b9e5f3823e136cb2488bbf3fbd87bc185a8f9ef2fc61e72758ad
|
7
|
+
data.tar.gz: 40fbc3bb0e328b9bfc6ddaeb247cb7ec0505aae90b995dc273f40358dfced4dcf4cbd3935e2d66265d56c1f1f4d07cb2451c8ecb1221a2859517266ed15866ba
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
Vladlev
|
2
|
+
===========
|
3
|
+
|
4
|
+
An implementation of the levenshtein distance algorithm for ruby using FFI
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your gemfile and then run `bundle` to install
|
9
|
+
|
10
|
+
gem 'vladlev'
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
Vladlev will calculate the levenshtein distance between two strings. The levenshtein distance is the number of transforms necessary to transform one string to be idential to the other. A transform is defined as an addition, deletion, or alteration of a single character in a string.
|
15
|
+
|
16
|
+
In order to calculate the distance
|
17
|
+
|
18
|
+
Vladlev.distance("string1", "string2")
|
19
|
+
>> 1
|
20
|
+
|
21
|
+
Vladlev also includes a 3 parameter version of the distance method. The third parameter is "maximum distance", which tells Vladlev to stop calculation once the distance becomes greater than this parameter.
|
22
|
+
|
23
|
+
In order to use this optimization
|
24
|
+
|
25
|
+
Vladlev.distance("string1234567890", "string1", 1)
|
26
|
+
>> 16
|
27
|
+
|
28
|
+
Vladlev.distance("string1234567890", "string1", 999)
|
29
|
+
>> 9
|
30
|
+
|
31
|
+
When given a pair of strings such that the distance between the two strings is greater than the "maximum distance" paramter, Vladlev will return the length of the longest string rather than spend the effort of calculating the distance when you know that you are not interested in the result.
|
32
|
+
|
33
|
+
## Development
|
34
|
+
|
35
|
+
Vladlev uses rake-compiler for a build tool
|
36
|
+
|
37
|
+
bundle exec rake compile
|
38
|
+
|
39
|
+
bundle exec rake clean
|
40
|
+
|
41
|
+
For an agressive clean of compiled files, you can do this
|
42
|
+
|
43
|
+
bundle exec rake clobber
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rubygems/package_task'
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rake/clean'
|
5
|
+
|
6
|
+
require 'rspec/core/rake_task'
|
7
|
+
RSpec::Core::RakeTask.new(:spec)
|
8
|
+
|
9
|
+
spec = Gem::Specification.load('vladlev.gemspec')
|
10
|
+
|
11
|
+
Gem::PackageTask.new(spec) do |pkg|
|
12
|
+
end
|
13
|
+
|
14
|
+
if RUBY_PLATFORM =~ /java/
|
15
|
+
require 'rake/javaextensiontask'
|
16
|
+
Rake::JavaExtensionTask.new('levenshtein', spec)
|
17
|
+
else
|
18
|
+
require 'rake/extensiontask'
|
19
|
+
Rake::ExtensionTask.new('levenshtein', spec)
|
20
|
+
end
|
21
|
+
|
22
|
+
task :default => :spec
|
data/ext/.DS_Store
ADDED
Binary file
|
@@ -0,0 +1,66 @@
|
|
1
|
+
public class LevenshteinDistance {
|
2
|
+
private static int minimum(int a, int b, int c) {
|
3
|
+
return Math.min(Math.min(a, b), c);
|
4
|
+
}
|
5
|
+
|
6
|
+
public static int distance(String str1, String str2, long maximumDistance) {
|
7
|
+
boolean brokeMax = false;
|
8
|
+
int rowMinimum;
|
9
|
+
int cost;
|
10
|
+
String longestString = (str1.length() > str2.length()) ? str1 : str2;
|
11
|
+
String shortestString = (str1.length() > str2.length()) ? str2 : str1;
|
12
|
+
|
13
|
+
if (shortestString.equals(longestString)) {
|
14
|
+
return 0;
|
15
|
+
} else if (longestString.length() - shortestString.length() > maximumDistance) {
|
16
|
+
return longestString.length();
|
17
|
+
} else if (shortestString.length() == 0 || longestString.length() == 0) {
|
18
|
+
return longestString.length();
|
19
|
+
}
|
20
|
+
|
21
|
+
int[] workingGrid = new int[shortestString.length() + 1];
|
22
|
+
int[] calculationGrid = new int[shortestString.length() + 1];
|
23
|
+
int[] tempGrid;
|
24
|
+
|
25
|
+
for (int i = 0; i <= shortestString.length(); i++) {
|
26
|
+
calculationGrid[i] = i;
|
27
|
+
}
|
28
|
+
|
29
|
+
for (int i = 1; i <= longestString.length(); i++) {
|
30
|
+
rowMinimum = workingGrid[0] = calculationGrid[0] + 1;
|
31
|
+
|
32
|
+
for (int j = 1; j <= shortestString.length(); j++) {
|
33
|
+
cost = (shortestString.charAt(j-1) == longestString.charAt(i-1)) ? 0 : 1;
|
34
|
+
workingGrid[j] = minimum(calculationGrid[j]+1, workingGrid[j-1]+1, calculationGrid[j-1]+cost);
|
35
|
+
rowMinimum = (workingGrid[j] < rowMinimum) ? workingGrid[j] : rowMinimum;
|
36
|
+
}
|
37
|
+
|
38
|
+
if (rowMinimum > maximumDistance) {
|
39
|
+
brokeMax = true;
|
40
|
+
break;
|
41
|
+
}
|
42
|
+
|
43
|
+
tempGrid = workingGrid;
|
44
|
+
workingGrid = calculationGrid;
|
45
|
+
calculationGrid = tempGrid;
|
46
|
+
}
|
47
|
+
|
48
|
+
return brokeMax ? longestString.length() : calculationGrid[shortestString.length()];
|
49
|
+
}
|
50
|
+
|
51
|
+
public static int distance(String str1, String str2) {
|
52
|
+
return distance(str1, str2, 9999);
|
53
|
+
}
|
54
|
+
|
55
|
+
public static float normalized_distance(String str1, String str2) {
|
56
|
+
return normalized_distance(str1, str2, 9999);
|
57
|
+
}
|
58
|
+
|
59
|
+
public static float normalized_distance(String str1, String str2, long maximumDistance) {
|
60
|
+
int maxStringLength = (str1.length() > str2.length()) ? str1.length() : str2.length();
|
61
|
+
if(maxStringLength == 0) {
|
62
|
+
return 0;
|
63
|
+
}
|
64
|
+
return distance(str1, str2, maximumDistance) / (float)maxStringLength;
|
65
|
+
}
|
66
|
+
}
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
|
5
|
+
#ifndef TRUE
|
6
|
+
#define TRUE 1
|
7
|
+
#define FALSE 0
|
8
|
+
#endif
|
9
|
+
|
10
|
+
#define ALLOC malloc
|
11
|
+
#define FREE free
|
12
|
+
|
13
|
+
/* Used to swap calc and work grids while looping */
|
14
|
+
#define LV_SWAP(t, a, b) { t = a; a = b; b = t;}
|
15
|
+
#define LV_MIN(a, b, c) ((a <= b) ? ((a <= c) ? a : c) : ((b <= c) ? b : c));
|
16
|
+
|
17
|
+
/* using a structure that will be filled on init with the variables that will makeup the distance constraints */
|
18
|
+
typedef struct{
|
19
|
+
char* a;
|
20
|
+
char* b;
|
21
|
+
int a_length;
|
22
|
+
int b_length;
|
23
|
+
int maximum_allowable_distance;
|
24
|
+
} LevenConstraints;
|
25
|
+
|
26
|
+
int levenshtein_intern(LevenConstraints* leven)
|
27
|
+
{
|
28
|
+
/* Check the minimum distance to keep from calculating anything when it isn't really needed
|
29
|
+
* Efficient use of the levenshtein call includes a maximum_allowable_distance (or distance threshold) */
|
30
|
+
if ((leven->b_length - leven->a_length) > leven->maximum_allowable_distance || (leven->a_length == 0 || leven->b_length == 0)) {
|
31
|
+
return (int)leven->b_length;
|
32
|
+
}
|
33
|
+
|
34
|
+
/* no reason to run levenshtein when equal */
|
35
|
+
if (leven->a_length == leven->b_length && strcmp(leven->a, leven->b) == 0) {
|
36
|
+
return (int)0;
|
37
|
+
}
|
38
|
+
|
39
|
+
leven->a_length++;
|
40
|
+
leven->b_length++;
|
41
|
+
|
42
|
+
int x, *grid_odd, *grid_even, i, j, cost, row_min, distance, *work_grid, *calc_grid, *tmp;
|
43
|
+
unsigned int broke_max = FALSE;
|
44
|
+
|
45
|
+
grid_even = ALLOC(sizeof(int) * (leven->a_length));
|
46
|
+
grid_odd = ALLOC(sizeof(int) * (leven->a_length));
|
47
|
+
|
48
|
+
if(grid_even == NULL || grid_odd == NULL) {
|
49
|
+
return (int)9999; /* error occured - cannot allocate memory */
|
50
|
+
}
|
51
|
+
|
52
|
+
work_grid = grid_odd;
|
53
|
+
calc_grid = grid_even;
|
54
|
+
|
55
|
+
for(x = 0; x < leven->a_length; x++)
|
56
|
+
grid_even[x] = x;
|
57
|
+
|
58
|
+
for(i = 1; i < leven->b_length; i++) {
|
59
|
+
row_min = work_grid[0] = calc_grid[0] + 1;
|
60
|
+
|
61
|
+
for(j = 1; j < leven->a_length; j++) {
|
62
|
+
cost = (leven->a[j-1] == leven->b[i-1]) ? 0 : 1;
|
63
|
+
work_grid[j] = LV_MIN(calc_grid[j]+1, work_grid[j-1]+1, calc_grid[j-1] + cost);
|
64
|
+
row_min = (work_grid[j] < row_min) ? work_grid[j] : row_min;
|
65
|
+
}
|
66
|
+
|
67
|
+
if(row_min > leven->maximum_allowable_distance) {
|
68
|
+
broke_max = TRUE;
|
69
|
+
break;
|
70
|
+
}
|
71
|
+
|
72
|
+
LV_SWAP(tmp, work_grid, calc_grid);
|
73
|
+
}
|
74
|
+
|
75
|
+
distance = (broke_max == TRUE) ? (leven->b_length - 1) : calc_grid[leven->a_length-1];
|
76
|
+
|
77
|
+
FREE(grid_odd);
|
78
|
+
FREE(grid_even);
|
79
|
+
|
80
|
+
return (int) distance;
|
81
|
+
}
|
82
|
+
|
83
|
+
int levenshtein_extern(char* a, char* b, int max_distance)
|
84
|
+
{
|
85
|
+
int a_len = (a == NULL) ? 0 : strlen(a);
|
86
|
+
int b_len = (b == NULL) ? 0 : strlen(b);
|
87
|
+
|
88
|
+
LevenConstraints* leven = malloc(sizeof(LevenConstraints));
|
89
|
+
leven->a = (a_len > b_len) ? b : a;
|
90
|
+
leven->b = (a_len > b_len) ? a : b;
|
91
|
+
leven->a_length = strlen(leven->a);
|
92
|
+
leven->b_length = strlen(leven->b);
|
93
|
+
|
94
|
+
int distance = leven->b_length;
|
95
|
+
|
96
|
+
if(max_distance < 0) {
|
97
|
+
max_distance = (a_len > b_len) ? a_len : b_len;
|
98
|
+
}
|
99
|
+
|
100
|
+
leven->maximum_allowable_distance = max_distance;
|
101
|
+
|
102
|
+
distance = levenshtein_intern(leven);
|
103
|
+
FREE(leven);
|
104
|
+
|
105
|
+
return distance;
|
106
|
+
}
|
107
|
+
|
108
|
+
float normalized_levenshtein_extern(char* a, char* b, int max_distance)
|
109
|
+
{
|
110
|
+
int a_len = (a == NULL) ? 0 : strlen(a);
|
111
|
+
int b_len = (b == NULL) ? 0 : strlen(b);
|
112
|
+
|
113
|
+
int max_string_length = (a_len > b_len) ? a_len : b_len;
|
114
|
+
if(max_string_length == 0) {
|
115
|
+
return 0;
|
116
|
+
}
|
117
|
+
return (float)levenshtein_extern(a, b, max_distance) / max_string_length;
|
118
|
+
}
|
Binary file
|
data/lib/levenshtein.jar
ADDED
Binary file
|
data/lib/vladlev.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
module Vladlev
|
2
|
+
def self.relative_exists?(filename)
|
3
|
+
File.exists?(File.join(File.dirname(__FILE__), filename))
|
4
|
+
end
|
5
|
+
|
6
|
+
JRUBY_NATIVE = RUBY_PLATFORM =~ /java/ && relative_exists?('levenshtein.jar')
|
7
|
+
C_EXT_NATIVE = !JRUBY_NATIVE &&
|
8
|
+
(relative_exists?('levenshtein.bundle') || relative_exists?('levenshtein.so'))
|
9
|
+
|
10
|
+
if JRUBY_NATIVE
|
11
|
+
require 'java'
|
12
|
+
require File.join(File.dirname(__FILE__), 'levenshtein.jar')
|
13
|
+
|
14
|
+
# Calculate the levenshtein distance between two strings
|
15
|
+
#
|
16
|
+
# @param [String] first string to compare
|
17
|
+
# @param [String] second string to compare
|
18
|
+
# @return [Integer] the levenshtein distance between the strings
|
19
|
+
def self._internal_distance(str1, str2, max)
|
20
|
+
Java::LevenshteinDistance.distance(str1, str2, max)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self._normalized_distance(str1, str2, max)
|
24
|
+
Java::LevenshteinDistance.normalized_distance(str1, str2, max)
|
25
|
+
end
|
26
|
+
elsif C_EXT_NATIVE
|
27
|
+
require 'ffi'
|
28
|
+
extend ::FFI::Library
|
29
|
+
|
30
|
+
native_file_path = case
|
31
|
+
when relative_exists?('levenshtein.bundle') then
|
32
|
+
File.join(File.dirname(__FILE__), 'levenshtein.bundle')
|
33
|
+
else
|
34
|
+
File.join(File.dirname(__FILE__), 'levenshtein.so')
|
35
|
+
end
|
36
|
+
|
37
|
+
ffi_lib native_file_path
|
38
|
+
attach_function :levenshtein_extern, [:pointer, :pointer, :int32], :int32
|
39
|
+
attach_function :normalized_levenshtein_extern, [:pointer, :pointer, :int32], :float
|
40
|
+
|
41
|
+
# Calculate the levenshtein distance between two strings
|
42
|
+
#
|
43
|
+
# @param [String] first string to compare
|
44
|
+
# @param [String] second string to compare
|
45
|
+
# @return [Integer] the levenshtein distance between the strings
|
46
|
+
def self._internal_distance(str1, str2, max)
|
47
|
+
self.levenshtein_extern(str1, str2, max)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self._normalized_distance(str1, str2, max)
|
51
|
+
self.normalized_levenshtein_extern(str1, str2, max)
|
52
|
+
end
|
53
|
+
else
|
54
|
+
require 'vladlev/levenshtein'
|
55
|
+
warn <<-PURE_RUBY
|
56
|
+
Could not load C extension or Java Extension for Vladlev
|
57
|
+
Will utilize pure ruby version which is significantly
|
58
|
+
slower for many comparisons.
|
59
|
+
PURE_RUBY
|
60
|
+
|
61
|
+
# Calculate the levenshtein distance between two strings
|
62
|
+
#
|
63
|
+
# @param [String] first string to compare
|
64
|
+
# @param [String] second string to compare
|
65
|
+
# @return [Integer] the levenshtein distance between the strings
|
66
|
+
def self._internal_distance(str1, str2, max)
|
67
|
+
::Vladlev::Levenshtein.distance(str1, str2, max)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.distance(str1, str2, max = 9999)
|
72
|
+
return 0 if str1.nil? && str2.nil?
|
73
|
+
return str2.size if str1.nil?
|
74
|
+
return str1.size if str2.nil?
|
75
|
+
|
76
|
+
self._internal_distance(str1, str2, max)
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.get_normalized_distance(str1, str2, max = 9999)
|
80
|
+
return 0 if str1.nil? && str2.nil?
|
81
|
+
return str2.size if str1.nil?
|
82
|
+
return str1.size if str2.nil?
|
83
|
+
|
84
|
+
self._normalized_distance(str1, str2, max)
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Vladlev
|
2
|
+
class Levenshtein
|
3
|
+
def self.distance(str1, str2, maximum_allowable_distance = 9999)
|
4
|
+
shortest_string = (str1.size > str2.size) ? str1 : str2
|
5
|
+
longest_string = (str1.size > str2.size) ? str2 : str1
|
6
|
+
broke_max = false
|
7
|
+
|
8
|
+
if longest_string == shortest_string
|
9
|
+
return 0
|
10
|
+
elsif longest_string.size - shortest_string.size > maximum_allowable_distance
|
11
|
+
return shortest_string.size
|
12
|
+
elsif longest_string.size == 0 || shortest_string.size == 0
|
13
|
+
return shortest_string.size
|
14
|
+
end
|
15
|
+
|
16
|
+
calculation_grid = Array.new(longest_string.size)
|
17
|
+
working_grid = Array.new(longest_string.size)
|
18
|
+
|
19
|
+
longest_string.size.times { |position| calculation_grid[position] = position }
|
20
|
+
|
21
|
+
(1...shortest_string.size).each do |i|
|
22
|
+
row_minimum = working_grid[0] = calculation_grid[0] + 1
|
23
|
+
|
24
|
+
(1...longest_string.size).each do |j|
|
25
|
+
cost = (longest_string[j - 1] == shortest_string[i - 1]) ? 0 : 1
|
26
|
+
working_grid[j] = [calculation_grid[j] + 1, working_grid[j - 1] + 1, calculation_grid[j - 1] + cost].min
|
27
|
+
row_minimum = (working_grid[j] < row_minimum) ? working_grid[j] : row_minimum
|
28
|
+
end
|
29
|
+
|
30
|
+
if row_minimum > maximum_allowable_distance
|
31
|
+
broke_max = true
|
32
|
+
break
|
33
|
+
end
|
34
|
+
|
35
|
+
temp_grid = working_grid
|
36
|
+
working_grid = calculation_grid
|
37
|
+
calculation_grid = temp_grid
|
38
|
+
end
|
39
|
+
|
40
|
+
return broke_max ? shortest_string.size : calculation_grid[longest_string.size - 1]
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.normalized_distance(str1, str2, maximum_allowable_distance = 9999)
|
44
|
+
longest_string_length = (str1 > str2) ? str1.length : str2.length
|
45
|
+
return 0 if longest_string_length == 0
|
46
|
+
distance / longest_string_length
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Vladlev do
|
4
|
+
specify{ Vladlev.distance("hello", "hello").should equal(0) }
|
5
|
+
specify{ Vladlev.distance("", "").should equal(0) }
|
6
|
+
specify{ Vladlev.distance("hello", "jello").should equal(1) }
|
7
|
+
specify{ Vladlev.distance("hella", "hello").should equal(1) }
|
8
|
+
specify{ Vladlev.distance("hello", "jell").should equal(2) }
|
9
|
+
specify{ Vladlev.distance("lo", "jello").should equal(3) }
|
10
|
+
specify{ Vladlev.distance("jello", "lo").should equal(3) }
|
11
|
+
specify{ Vladlev.distance("", "jello").should equal("jello".length) }
|
12
|
+
specify{ Vladlev.distance("jello", "").should equal("jello".length) }
|
13
|
+
specify{ Vladlev.distance("hello"*2, "jello"*2).should equal(2) }
|
14
|
+
specify{ Vladlev.distance("hello"*2, "jelo"*2).should equal(4) }
|
15
|
+
specify{ Vladlev.distance("hello"*2, "jell"*2).should equal(4) }
|
16
|
+
specify{ Vladlev.distance("hello"*4, "jello"*4).should equal(4) }
|
17
|
+
specify{ Vladlev.distance("hello"*8, "jello"*8).should equal(8) }
|
18
|
+
specify{ Vladlev.distance("hello"*16, "jello"*16).should equal(16) }
|
19
|
+
specify{ Vladlev.distance("hello"*32, "jello"*32).should equal(32) }
|
20
|
+
specify{ Vladlev.distance("hello"*64, "jello"*64).should equal(64) }
|
21
|
+
specify{ Vladlev.distance("hello"*128, "jello"*128).should equal(128) }
|
22
|
+
specify{ Vladlev.distance("hello"*256, "jello"*256).should equal(256) }
|
23
|
+
specify{ Vladlev.distance("hello"*512, "jello"*512).should equal(512) }
|
24
|
+
|
25
|
+
describe "threshold" do
|
26
|
+
specify{ Vladlev.distance("hello"*100, "jello"*100, 10).should equal(500) }
|
27
|
+
specify{ Vladlev.distance("hello"*100, "jello"*100, 99).should equal(500) }
|
28
|
+
specify{ Vladlev.distance("hello"*100, "jello"*100, 100).should equal(100) }
|
29
|
+
specify{ Vladlev.distance("hello"*100, "jello"*100, 1000).should equal(100) }
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "long strings" do
|
33
|
+
specify{ Vladlev.distance("hello"*200, "jello"*200).should equal(200) }
|
34
|
+
specify{ Vladlev.distance("hello"*500, "jello"*500).should equal(500) }
|
35
|
+
specify{ Vladlev.distance("hello"*750, "jello"*750).should equal(750) }
|
36
|
+
specify{ Vladlev.distance("hello"*950, "jello"*950).should equal(950) }
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "threshold long strings" do
|
40
|
+
specify{ Vladlev.distance("hello"*2000, "jello"*2000, 10).should equal(5*2000) }
|
41
|
+
specify{ Vladlev.distance("hello"*5000, "jello"*5000, 10).should equal(5*5000) }
|
42
|
+
specify{ Vladlev.distance("hello"*7500, "jello"*7500, 10).should equal(5*7500) }
|
43
|
+
specify{ Vladlev.distance("hello"*9500, "jello"*9500, 10).should equal(5*9500) }
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "special chars" do
|
47
|
+
specify{ Vladlev.distance("*&^%$", "").should equal(5) }
|
48
|
+
specify{ Vladlev.distance("", ",./>?").should equal(5) }
|
49
|
+
specify{ Vladlev.distance('*&^%$+_=-)(*&^%$#@!~123456789', '*&^%$+_=-)(*&^%$#@!~').should equal(9) }
|
50
|
+
specify{ Vladlev.distance('*&^%$+_=-)(*&^%$#@!~', "").should equal(20) }
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "normalized distance" do
|
54
|
+
specify{ expect(Vladlev.get_normalized_distance("hi", "high", 1)).to eq(1.0) }
|
55
|
+
specify{ expect(Vladlev.get_normalized_distance("hi", "high")).to eq(0.5) }
|
56
|
+
specify{ expect(Vladlev.get_normalized_distance("hello", "hello")).to eq(0.0) }
|
57
|
+
specify{ expect(Vladlev.get_normalized_distance("goodnight", "goodnite")).to eq(0.3333333432674408) }
|
58
|
+
specify{ expect(Vladlev.get_normalized_distance("", "goodbye")).to eq(1.0) }
|
59
|
+
specify{ expect(Vladlev.get_normalized_distance("goodbye", "")).to eq(1.0) }
|
60
|
+
specify{ expect(Vladlev.get_normalized_distance("", "")).to eq(0.0) }
|
61
|
+
end
|
62
|
+
|
63
|
+
describe '#distance' do
|
64
|
+
context "when given two strings to match" do
|
65
|
+
it "returns the distance" do
|
66
|
+
string1 = "lorem ipsum"
|
67
|
+
string2 = "borem ipsum"
|
68
|
+
described_class.distance(string1, string2).should eq(1)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/vladlev.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require 'vladlev/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'vladlev'
|
7
|
+
s.version = Vladlev::VERSION
|
8
|
+
s.summary = 'Levenshtein matching algorithm for ruby using C'
|
9
|
+
s.description = 'Levenshtein matching algorithm for ruby using C with an FFI extension'
|
10
|
+
s.authors = ["Brian Stien"]
|
11
|
+
s.email = 'dev@moneydesktop.com'
|
12
|
+
s.homepage = 'https://github.com/mxenabled/vladlev'
|
13
|
+
|
14
|
+
if defined?(JRUBY_VERSION)
|
15
|
+
s.platform = 'java'
|
16
|
+
else
|
17
|
+
s.extensions = ['ext/levenshtein/extconf.rb']
|
18
|
+
s.add_runtime_dependency 'ffi'
|
19
|
+
end
|
20
|
+
|
21
|
+
s.add_development_dependency 'rake'
|
22
|
+
s.add_development_dependency 'rspec'
|
23
|
+
s.add_development_dependency 'rspec-pride'
|
24
|
+
s.add_development_dependency 'pry'
|
25
|
+
s.add_development_dependency 'geminabox'
|
26
|
+
s.add_development_dependency 'simplecov'
|
27
|
+
s.add_development_dependency 'rake-compiler'
|
28
|
+
|
29
|
+
s.files = `git ls-files`.split("\n")
|
30
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
31
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
32
|
+
s.require_paths - ["lib"]
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vladlev
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brian Stien
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec-pride
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: geminabox
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: simplecov
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rake-compiler
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
description: Levenshtein matching algorithm for ruby using C with an FFI extension
|
126
|
+
email: dev@moneydesktop.com
|
127
|
+
executables: []
|
128
|
+
extensions:
|
129
|
+
- ext/levenshtein/extconf.rb
|
130
|
+
extra_rdoc_files: []
|
131
|
+
files:
|
132
|
+
- ".gitignore"
|
133
|
+
- ".rspec"
|
134
|
+
- Gemfile
|
135
|
+
- README.md
|
136
|
+
- Rakefile
|
137
|
+
- ext/.DS_Store
|
138
|
+
- ext/levenshtein/LevenshteinDistance.java
|
139
|
+
- ext/levenshtein/extconf.rb
|
140
|
+
- ext/levenshtein/levenshtein.c
|
141
|
+
- lib/levenshtein.bundle
|
142
|
+
- lib/levenshtein.jar
|
143
|
+
- lib/vladlev.rb
|
144
|
+
- lib/vladlev/levenshtein.rb
|
145
|
+
- lib/vladlev/version.rb
|
146
|
+
- spec/lib/levenshtein_spec.rb
|
147
|
+
- spec/spec_helper.rb
|
148
|
+
- vladlev.gemspec
|
149
|
+
homepage: https://github.com/mxenabled/vladlev
|
150
|
+
licenses: []
|
151
|
+
metadata: {}
|
152
|
+
post_install_message:
|
153
|
+
rdoc_options: []
|
154
|
+
require_paths:
|
155
|
+
- lib
|
156
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
157
|
+
requirements:
|
158
|
+
- - ">="
|
159
|
+
- !ruby/object:Gem::Version
|
160
|
+
version: '0'
|
161
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ">="
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
requirements: []
|
167
|
+
rubyforge_project:
|
168
|
+
rubygems_version: 2.4.8
|
169
|
+
signing_key:
|
170
|
+
specification_version: 4
|
171
|
+
summary: Levenshtein matching algorithm for ruby using C
|
172
|
+
test_files:
|
173
|
+
- spec/lib/levenshtein_spec.rb
|
174
|
+
- spec/spec_helper.rb
|