phonetics 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/ext/c_levenshtein/levenshtein.c +21 -15
- data/lib/phonetics/ruby_levenshtein.rb +3 -8
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8f26a3fc9c4a6729a0ddd04beee049efc4d1c0e8e2b887cfd14974de71d989a
|
4
|
+
data.tar.gz: 4449ecff87444214065a0f964c2cd7a0ba2ce93f52b3cbeae14e4777afcba3df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4309942350149685324fa56a9c642f9456c7abe1993ab7f0801fbf56883afc4e31750e969e6d49e3da0ecd7b16a8d93b9212f5469ddc1dcd4c86699d0f48cd46
|
7
|
+
data.tar.gz: 8007ae8ef77e2d9e5f055fc597c4c5ffbd9ee00007d9d31ed4d06ae96e9980bc2dfc92149c96bd94098faa7d0670c5d7cbd13a30f45acd8d8c6000853692f341
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.5.
|
1
|
+
1.5.4
|
@@ -55,9 +55,9 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
55
55
|
string1[i] = NUM2INT(string1_ruby[i]);
|
56
56
|
debug("string1[%d] = %d\n", i, string1[i]);
|
57
57
|
}
|
58
|
-
for (
|
59
|
-
string2[
|
60
|
-
debug("string2[%d] = %d\n", i, string2[
|
58
|
+
for (j = 0; j < string2_length; j++) {
|
59
|
+
string2[j] = NUM2INT(string2_ruby[j]);
|
60
|
+
debug("string2[%d] = %d\n", i, string2[j]);
|
61
61
|
}
|
62
62
|
|
63
63
|
// one-dimensional representation of 2 dimentional array len(string1)+1 *
|
@@ -81,8 +81,8 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
81
81
|
// phonetic edit distance for that matrix cell.
|
82
82
|
// (Skipping i=0 and j=0 because set_initial filled in all cells where i
|
83
83
|
// or j are zero-valued)
|
84
|
-
for(j = 1; j <= string2_length; j++){
|
85
|
-
for(i = 1; i <= string1_length; i++){
|
84
|
+
for (j = 1; j <= string2_length; j++){
|
85
|
+
for (i = 1; i <= string1_length; i++){
|
86
86
|
|
87
87
|
// The cost of deletion or addition is the Levenshtein distance
|
88
88
|
// calculation (the value in the cell to the left, upper-left, or above)
|
@@ -141,24 +141,22 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
141
141
|
// "aek" -> [0.0, 1.0, 1.61, 2.61]
|
142
142
|
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
|
143
143
|
|
144
|
-
double
|
144
|
+
double initial_distance;
|
145
145
|
int i, j;
|
146
146
|
|
147
147
|
if (string1_length == 0 || string2_length == 0) {
|
148
|
-
|
149
|
-
} else if (string1[0] == string2[0]) {
|
150
|
-
distance_between_first_phonemes = 0.0;
|
148
|
+
initial_distance = 0.0;
|
151
149
|
} else {
|
152
|
-
|
150
|
+
initial_distance = 1.0;
|
153
151
|
}
|
154
152
|
|
153
|
+
// The top-left is 0, the cell to the right and down are each 1 to start
|
155
154
|
d[0] = (double) 0.0;
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
// And of string2 (maps to cell x=0, y=1)
|
155
|
+
if (string1_length > 0) {
|
156
|
+
d[1] = initial_distance;
|
157
|
+
}
|
160
158
|
if (string2_length > 0) {
|
161
|
-
d[string1_length+1] =
|
159
|
+
d[string1_length+1] = initial_distance;
|
162
160
|
}
|
163
161
|
|
164
162
|
debug("string1 length: %d\n", string1_length);
|
@@ -172,6 +170,14 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
|
|
172
170
|
// The same exact pattern down the left side of the matrix
|
173
171
|
d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]);
|
174
172
|
}
|
173
|
+
|
174
|
+
// And zero out the rest. If you're reading this please edit this to be
|
175
|
+
// faster.
|
176
|
+
for (j=1; j <= string2_length; j++) {
|
177
|
+
for (i=1; i <= string1_length; i++) {
|
178
|
+
d[j * (string1_length+1) + i] = (double) 0.0;
|
179
|
+
}
|
180
|
+
}
|
175
181
|
}
|
176
182
|
|
177
183
|
// A handy visualization for developers
|
@@ -121,18 +121,13 @@ module Phonetics
|
|
121
121
|
# Set the minimum scores equal to the distance between each phoneme,
|
122
122
|
# sequentially.
|
123
123
|
#
|
124
|
-
# The first value is always zero.
|
125
|
-
# The second value is always the phonetic distance between the first
|
126
|
-
# phonemes of each string.
|
124
|
+
# The first value is always zero, the second is always 1.
|
127
125
|
# Subsequent values are the cumulative phonetic distance between each
|
128
126
|
# phoneme within the same string.
|
129
127
|
# "aek" -> [0, 1, 1.61, 2.61]
|
130
128
|
def initial_distances(str1, str2)
|
131
|
-
starting_distance =
|
132
|
-
|
133
|
-
else
|
134
|
-
Phonetics.distance(str1[0], str2[0])
|
135
|
-
end
|
129
|
+
starting_distance = 1
|
130
|
+
starting_distance = 0 if len1 == 0 || len2 == 0
|
136
131
|
|
137
132
|
distances1 = (1..(str1.length - 1)).reduce([0, starting_distance]) do |acc, i|
|
138
133
|
acc << acc.last + Phonetics.distance(str1[i - 1], str1[i])
|