phonetics 1.5.3 → 1.5.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/ext/c_levenshtein/levenshtein.c +21 -15
- data/lib/phonetics/ruby_levenshtein.rb +3 -8
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8f26a3fc9c4a6729a0ddd04beee049efc4d1c0e8e2b887cfd14974de71d989a
|
4
|
+
data.tar.gz: 4449ecff87444214065a0f964c2cd7a0ba2ce93f52b3cbeae14e4777afcba3df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4309942350149685324fa56a9c642f9456c7abe1993ab7f0801fbf56883afc4e31750e969e6d49e3da0ecd7b16a8d93b9212f5469ddc1dcd4c86699d0f48cd46
|
7
|
+
data.tar.gz: 8007ae8ef77e2d9e5f055fc597c4c5ffbd9ee00007d9d31ed4d06ae96e9980bc2dfc92149c96bd94098faa7d0670c5d7cbd13a30f45acd8d8c6000853692f341
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.5.
|
1
|
+
1.5.4
|
@@ -55,9 +55,9 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
55
55
|
string1[i] = NUM2INT(string1_ruby[i]);
|
56
56
|
debug("string1[%d] = %d\n", i, string1[i]);
|
57
57
|
}
|
58
|
-
for (
|
59
|
-
string2[
|
60
|
-
debug("string2[%d] = %d\n", i, string2[
|
58
|
+
for (j = 0; j < string2_length; j++) {
|
59
|
+
string2[j] = NUM2INT(string2_ruby[j]);
|
60
|
+
debug("string2[%d] = %d\n", i, string2[j]);
|
61
61
|
}
|
62
62
|
|
63
63
|
// one-dimensional representation of 2 dimentional array len(string1)+1 *
|
@@ -81,8 +81,8 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
81
81
|
// phonetic edit distance for that matrix cell.
|
82
82
|
// (Skipping i=0 and j=0 because set_initial filled in all cells where i
|
83
83
|
// or j are zero-valued)
|
84
|
-
for(j = 1; j <= string2_length; j++){
|
85
|
-
for(i = 1; i <= string1_length; i++){
|
84
|
+
for (j = 1; j <= string2_length; j++){
|
85
|
+
for (i = 1; i <= string1_length; i++){
|
86
86
|
|
87
87
|
// The cost of deletion or addition is the Levenshtein distance
|
88
88
|
// calculation (the value in the cell to the left, upper-left, or above)
|
@@ -141,24 +141,22 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
141
141
|
// "aek" -> [0.0, 1.0, 1.61, 2.61]
|
142
142
|
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
|
143
143
|
|
144
|
-
double
|
144
|
+
double initial_distance;
|
145
145
|
int i, j;
|
146
146
|
|
147
147
|
if (string1_length == 0 || string2_length == 0) {
|
148
|
-
|
149
|
-
} else if (string1[0] == string2[0]) {
|
150
|
-
distance_between_first_phonemes = 0.0;
|
148
|
+
initial_distance = 0.0;
|
151
149
|
} else {
|
152
|
-
|
150
|
+
initial_distance = 1.0;
|
153
151
|
}
|
154
152
|
|
153
|
+
// The top-left is 0, the cell to the right and down are each 1 to start
|
155
154
|
d[0] = (double) 0.0;
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
// And of string2 (maps to cell x=0, y=1)
|
155
|
+
if (string1_length > 0) {
|
156
|
+
d[1] = initial_distance;
|
157
|
+
}
|
160
158
|
if (string2_length > 0) {
|
161
|
-
d[string1_length+1] =
|
159
|
+
d[string1_length+1] = initial_distance;
|
162
160
|
}
|
163
161
|
|
164
162
|
debug("string1 length: %d\n", string1_length);
|
@@ -172,6 +170,14 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
|
|
172
170
|
// The same exact pattern down the left side of the matrix
|
173
171
|
d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]);
|
174
172
|
}
|
173
|
+
|
174
|
+
// And zero out the rest. If you're reading this please edit this to be
|
175
|
+
// faster.
|
176
|
+
for (j=1; j <= string2_length; j++) {
|
177
|
+
for (i=1; i <= string1_length; i++) {
|
178
|
+
d[j * (string1_length+1) + i] = (double) 0.0;
|
179
|
+
}
|
180
|
+
}
|
175
181
|
}
|
176
182
|
|
177
183
|
// A handy visualization for developers
|
@@ -121,18 +121,13 @@ module Phonetics
|
|
121
121
|
# Set the minimum scores equal to the distance between each phoneme,
|
122
122
|
# sequentially.
|
123
123
|
#
|
124
|
-
# The first value is always zero.
|
125
|
-
# The second value is always the phonetic distance between the first
|
126
|
-
# phonemes of each string.
|
124
|
+
# The first value is always zero, the second is always 1.
|
127
125
|
# Subsequent values are the cumulative phonetic distance between each
|
128
126
|
# phoneme within the same string.
|
129
127
|
# "aek" -> [0, 1, 1.61, 2.61]
|
130
128
|
def initial_distances(str1, str2)
|
131
|
-
starting_distance =
|
132
|
-
|
133
|
-
else
|
134
|
-
Phonetics.distance(str1[0], str2[0])
|
135
|
-
end
|
129
|
+
starting_distance = 1
|
130
|
+
starting_distance = 0 if len1 == 0 || len2 == 0
|
136
131
|
|
137
132
|
distances1 = (1..(str1.length - 1)).reduce([0, starting_distance]) do |acc, i|
|
138
133
|
acc << acc.last + Phonetics.distance(str1[i - 1], str1[i])
|