mini-levenshtein 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/ext/mini_levenshtein/extconf.rb +1 -0
- data/ext/mini_levenshtein/mini_levenshtein.c +91 -14
- data/lib/mini-levenshtein/version.rb +1 -1
- data/lib/mini-levenshtein.rb +0 -54
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87d8dfd831b540e20d080df316abaf2195f371822d68e819e5c717fd2cbb637d
|
4
|
+
data.tar.gz: acd26d48b08feeaecea2ff7b9e4afe7f751e6590dafd54c87971fec244d79e28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a380bcd7f95a4bc0ae68a871a40c0ec18beab58ef5af6bcc3313de38bd836f1e0e40538e491c9c0be46ba7882f25607b747cce8b579654d58cbac55303e67f1
|
7
|
+
data.tar.gz: f46268d96b7dfb651bb42db3d2f509bb5e2bdb908375832ee0a12a21ce388461cf52434156e042c8ba201c24041451dc03dd74d17b43cf3d2efec913089472e1
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -5,9 +5,9 @@ Simple, fast, levenshtein distance
|
|
5
5
|
# Synopsis
|
6
6
|
|
7
7
|
```ruby
|
8
|
-
MiniLevenshtein.
|
8
|
+
MiniLevenshtein.edit_distance("levenshtein", "levenstien") # => 3
|
9
9
|
|
10
|
-
MiniLevenshtein.
|
10
|
+
MiniLevenshtein.similarity("levenshtein", "levenstien") # 0.857142...
|
11
11
|
```
|
12
12
|
|
13
13
|
# Credits
|
@@ -1,23 +1,20 @@
|
|
1
|
-
#include <
|
2
|
-
#include
|
3
|
-
#include <wchar.h>
|
4
|
-
#include <stdlib.h>
|
1
|
+
#include <ruby.h>
|
2
|
+
#include "extconf.h"
|
5
3
|
|
6
|
-
#include "ruby.h"
|
7
4
|
#include "levenshtein.h"
|
8
5
|
|
9
|
-
VALUE
|
6
|
+
VALUE MiniLevenshtein = Qnil;
|
10
7
|
|
11
|
-
VALUE
|
8
|
+
VALUE rb_lev_edit_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost);
|
9
|
+
VALUE rb_edit_distance(VALUE self, VALUE s1, VALUE s2);
|
10
|
+
VALUE rb_similarity(VALUE self, VALUE s1, VALUE s2);
|
12
11
|
|
13
|
-
|
14
|
-
{
|
15
|
-
MiniLevenshteinInternal = rb_define_module("MiniLevenshteinInternal");
|
16
|
-
rb_define_method(MiniLevenshteinInternal, "internal_distance", method_internal_distance, 3);
|
17
|
-
}
|
18
|
-
|
19
|
-
VALUE method_internal_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost)
|
12
|
+
VALUE rb_lev_edit_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost)
|
20
13
|
{
|
14
|
+
Check_Type(s1, T_STRING);
|
15
|
+
Check_Type(s2, T_STRING);
|
16
|
+
Check_Type(xcost, T_FIXNUM);
|
17
|
+
|
21
18
|
size_t len1 = RSTRING_LEN(s1);
|
22
19
|
size_t len2 = RSTRING_LEN(s2);
|
23
20
|
|
@@ -30,3 +27,83 @@ VALUE method_internal_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost)
|
|
30
27
|
|
31
28
|
return INT2NUM(distance);
|
32
29
|
}
|
30
|
+
|
31
|
+
/*
|
32
|
+
* Compute absolute Levenshtein distance of two strings.
|
33
|
+
*
|
34
|
+
* @example it's hard to spell Levenshtein correctly
|
35
|
+
* edit_distance('Levenshtein', 'Lenvinsten') # => 4
|
36
|
+
* edit_distance('Levenshtein', 'Levensthein') # => 2
|
37
|
+
* edit_distance('Levenshtein', 'Levenshten') # => 1
|
38
|
+
* edit_distance('Levenshtein', 'Levenshtein') # => 0
|
39
|
+
* "Yeah, we've managed it at last."
|
40
|
+
*
|
41
|
+
* @param string1 [String]
|
42
|
+
* @param string2 [String]
|
43
|
+
*
|
44
|
+
* @return [Integer]
|
45
|
+
*/
|
46
|
+
VALUE rb_edit_distance(VALUE self, VALUE string1, VALUE string2)
|
47
|
+
{
|
48
|
+
Check_Type(string1, T_STRING);
|
49
|
+
Check_Type(string2, T_STRING);
|
50
|
+
|
51
|
+
size_t len1 = RSTRING_LEN(string1);
|
52
|
+
size_t len2 = RSTRING_LEN(string2);
|
53
|
+
|
54
|
+
const lev_byte *str1 = StringValuePtr(string1);
|
55
|
+
const lev_byte *str2 = StringValuePtr(string2);
|
56
|
+
|
57
|
+
long distance = lev_edit_distance(len1, str1, len2, str2, 0);
|
58
|
+
|
59
|
+
return INT2NUM(distance);
|
60
|
+
}
|
61
|
+
|
62
|
+
/*
|
63
|
+
* Compute similarity of two strings.
|
64
|
+
*
|
65
|
+
* The similarity is a number between 0 and 1.
|
66
|
+
*
|
67
|
+
* @example
|
68
|
+
* similarity('Hello World!', 'Holly grail!') # => 0.5833...
|
69
|
+
*
|
70
|
+
* @example
|
71
|
+
* similarity('Holmes', 'Jack') # => 0.0
|
72
|
+
*
|
73
|
+
* @param string1 [String]
|
74
|
+
* @param string2 [String]
|
75
|
+
*
|
76
|
+
* @return [Float]
|
77
|
+
*/
|
78
|
+
VALUE rb_similarity(VALUE self, VALUE string1, VALUE string2)
|
79
|
+
{
|
80
|
+
Check_Type(string1, T_STRING);
|
81
|
+
Check_Type(string2, T_STRING);
|
82
|
+
|
83
|
+
size_t len1 = RSTRING_LEN(string1);
|
84
|
+
size_t len2 = RSTRING_LEN(string2);
|
85
|
+
|
86
|
+
const lev_byte *str1 = StringValuePtr(string1);
|
87
|
+
const lev_byte *str2 = StringValuePtr(string2);
|
88
|
+
|
89
|
+
size_t lensum = len1 + len2;
|
90
|
+
if (lensum == 0) {
|
91
|
+
return DBL2NUM(1.0);
|
92
|
+
}
|
93
|
+
|
94
|
+
long distance = lev_edit_distance(len1, str1, len2, str2, 0);
|
95
|
+
if (distance == 0) {
|
96
|
+
return DBL2NUM(1.0);
|
97
|
+
}
|
98
|
+
|
99
|
+
return DBL2NUM((lensum - distance) / (double)lensum);
|
100
|
+
}
|
101
|
+
|
102
|
+
void Init_mini_levenshtein()
|
103
|
+
{
|
104
|
+
MiniLevenshtein = rb_define_module("MiniLevenshtein");
|
105
|
+
rb_define_singleton_method(MiniLevenshtein, "lev_edit_distance", rb_lev_edit_distance, 3);
|
106
|
+
rb_define_singleton_method(MiniLevenshtein, "edit_distance", rb_edit_distance, 2);
|
107
|
+
rb_define_singleton_method(MiniLevenshtein, "similarity", rb_similarity, 2);
|
108
|
+
}
|
109
|
+
|
data/lib/mini-levenshtein.rb
CHANGED
@@ -1,62 +1,8 @@
|
|
1
1
|
require 'mini-levenshtein/mini_levenshtein'
|
2
2
|
|
3
3
|
module MiniLevenshtein
|
4
|
-
extend MiniLevenshteinInternal
|
5
|
-
|
6
4
|
class << self
|
7
|
-
# Compute absolute Levenshtein distance of two strings.
|
8
|
-
#
|
9
|
-
# @example (it's hard to spell Levenshtein correctly):
|
10
|
-
# distance('Levenshtein', 'Lenvinsten') # => 4
|
11
|
-
# distance('Levenshtein', 'Levensthein') # => 2
|
12
|
-
# distance('Levenshtein', 'Levenshten') # => 1
|
13
|
-
# distance('Levenshtein', 'Levenshtein') # => 0
|
14
|
-
# "Yeah, we've managed it at last."
|
15
|
-
#
|
16
|
-
# @param string1 [String]
|
17
|
-
# @param string2 [String]
|
18
|
-
#
|
19
|
-
# @return [Integer]
|
20
|
-
def edit_distance(string1, string2)
|
21
|
-
validate_string!(string1)
|
22
|
-
validate_string!(string2)
|
23
|
-
|
24
|
-
internal_distance(string1, string2, 0)
|
25
|
-
end
|
26
5
|
alias distance edit_distance
|
27
|
-
|
28
|
-
# Compute similarity of two strings.
|
29
|
-
#
|
30
|
-
# The similarity is a number between 0 and 1.
|
31
|
-
#
|
32
|
-
# @example
|
33
|
-
# similarity('Hello World!', 'Holly grail!') # => 0.5833...
|
34
|
-
#
|
35
|
-
# @example
|
36
|
-
# similarity('Holmes', 'Jack') # => 0.0
|
37
|
-
#
|
38
|
-
# @param string1 [String]
|
39
|
-
# @param string2 [String]
|
40
|
-
#
|
41
|
-
# @return [Float]
|
42
|
-
def similarity(string1, string2)
|
43
|
-
validate_string!(string1)
|
44
|
-
validate_string!(string2)
|
45
|
-
|
46
|
-
lensum = string1.length + string2.length
|
47
|
-
return 1.0 if lensum.zero?
|
48
|
-
|
49
|
-
distance = internal_distance(string1, string2, 1)
|
50
|
-
return 1.0 if distance.zero?
|
51
|
-
|
52
|
-
(lensum - distance) / lensum.to_f
|
53
|
-
end
|
54
6
|
alias ratio similarity
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
def validate_string!(string)
|
59
|
-
raise TypeError, "no implicit conversion of #{string.class} to String" unless string.is_a?(String)
|
60
|
-
end
|
61
7
|
end
|
62
8
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mini-levenshtein
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Delon Newman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Simple, fast, levenshtein distance
|
14
14
|
email: contact@delonnewman.name
|