phonetics 3.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +17 -2
- data/Cargo.toml +27 -0
- data/Rakefile +58 -26
- data/VERSION +1 -1
- data/bin/phonetics +89 -0
- data/ext/phonetics_ruby/Cargo.toml +36 -0
- data/ext/phonetics_ruby/build.rs +24 -0
- data/ext/phonetics_ruby/extconf.rb +17 -0
- data/ext/phonetics_ruby/src/lib.rs +56 -0
- data/ext/phonetics_ruby/vendor/phonetics/Cargo.toml +30 -0
- data/ext/phonetics_ruby/vendor/phonetics/README.md +29 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/compounds.rs +40 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/confusion.rs +325 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/consonants.rs +363 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/cross_class.rs +56 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/diacritics.rs +113 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/distance.rs +183 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/levenshtein.rs +146 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/lib.rs +44 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/symbols.rs +21 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/tokenizer.rs +171 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/vowels.rs +197 -0
- data/lib/phonetics.rb +77 -2
- data/phonetics.gemspec +33 -9
- metadata +45 -34
- data/.github/workflows/gempush.yml +0 -28
- data/.github/workflows/test.yml +0 -20
- data/Makefile +0 -9
- data/ext/c_levenshtein/extconf.rb +0 -10
- data/ext/c_levenshtein/levenshtein.c +0 -223
- data/ext/c_levenshtein/next_phoneme_length.c +0 -1365
- data/ext/c_levenshtein/next_phoneme_length.h +0 -1
- data/ext/c_levenshtein/phonemes.c +0 -53
- data/ext/c_levenshtein/phonemes.h +0 -3
- data/ext/c_levenshtein/phonetic_cost.c +0 -88593
- data/ext/c_levenshtein/phonetic_cost.h +0 -1
- data/lib/phonetics/code_generator.rb +0 -228
- data/lib/phonetics/distances.rb +0 -249
- data/lib/phonetics/levenshtein.rb +0 -27
- data/lib/phonetics/ruby_levenshtein.rb +0 -162
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
//! Listener-confusion distance — Gotoh affine-gap DP over IPA tokens,
|
|
2
|
+
//! with weak-phoneme indel discount, boundary-token discount, and an
|
|
3
|
+
//! empirical-confusion overlay on top of the acoustic substitution
|
|
4
|
+
//! cost.
|
|
5
|
+
//!
|
|
6
|
+
//! Where [`crate::levenshtein`] answers "how different are these two
|
|
7
|
+
//! waveforms?", this answers "will a listener parse one as the other?".
|
|
8
|
+
//! Calibrated against Mad Gab puzzle data and against the maintainer's
|
|
9
|
+
//! West Coast American English. The overlay is a hand-curated table;
|
|
10
|
+
//! the DP shape is from Gotoh (1982).
|
|
11
|
+
//!
|
|
12
|
+
//! Three matrices:
|
|
13
|
+
//!
|
|
14
|
+
//! M[i][j] best score ending in a match/substitution
|
|
15
|
+
//! X[i][j] best score ending in an a-consuming gap
|
|
16
|
+
//! Y[i][j] best score ending in a b-consuming gap
|
|
17
|
+
//!
|
|
18
|
+
//! Affine gap pricing: opening a gap costs `GAP_OPEN`, extending one
|
|
19
|
+
//! already open costs `GAP_EXTEND` << `GAP_OPEN`. Mad Gab clues
|
|
20
|
+
//! typically add filler phonemes at word boundaries; one long gap
|
|
21
|
+
//! costs `GAP_OPEN + (k-1) * GAP_EXTEND`, not `k * GAP_OPEN`.
|
|
22
|
+
|
|
23
|
+
use crate::{distance as phoneme_distance, symbols, tokenizer};
|
|
24
|
+
|
|
25
|
+
// ------------------------------------------------------------------
|
|
26
|
+
// Tuning constants
|
|
27
|
+
// ------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
/// Cost of starting a new gap.
|
|
30
|
+
pub const GAP_OPEN: f64 = 0.60;
|
|
31
|
+
|
|
32
|
+
/// Cost of extending an already-open gap by one phoneme.
|
|
33
|
+
pub const GAP_EXTEND: f64 = 0.25;
|
|
34
|
+
|
|
35
|
+
/// Indel cost for "weak" phonemes — those routinely inserted, dropped,
|
|
36
|
+
/// or hallucinated in casual English: /ə/, /h/, /ʔ/, /ɦ/.
|
|
37
|
+
pub const WEAK_INDEL_COST: f64 = 0.15;
|
|
38
|
+
|
|
39
|
+
/// Indel cost for the word-boundary token. Lower than WEAK_INDEL_COST
|
|
40
|
+
/// because re-syllabification is the operation Mad Gab encodes; we
|
|
41
|
+
/// don't want to punish it.
|
|
42
|
+
pub const BOUNDARY_INDEL_COST: f64 = 0.02;
|
|
43
|
+
|
|
44
|
+
/// The four "weak" phonemes — segments with the highest measured
|
|
45
|
+
/// deletion/insertion rates in conversational English.
|
|
46
|
+
pub const WEAK_PHONEMES: &[&str] = &["ə", "h", "ʔ", "ɦ"];
|
|
47
|
+
|
|
48
|
+
/// True if `phoneme` is in the weak tier OR is the boundary token.
|
|
49
|
+
pub fn weak(phoneme: &str) -> bool {
|
|
50
|
+
phoneme == symbols::BOUNDARY_TOKEN || WEAK_PHONEMES.contains(&phoneme)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/// Tier-appropriate indel cost for `phoneme`, or `None` if it should
|
|
54
|
+
/// fall back to the affine GAP_OPEN/GAP_EXTEND machinery.
|
|
55
|
+
pub fn weak_indel_cost(phoneme: &str) -> Option<f64> {
|
|
56
|
+
if phoneme == symbols::BOUNDARY_TOKEN {
|
|
57
|
+
Some(BOUNDARY_INDEL_COST)
|
|
58
|
+
} else if WEAK_PHONEMES.contains(&phoneme) {
|
|
59
|
+
Some(WEAK_INDEL_COST)
|
|
60
|
+
} else {
|
|
61
|
+
None
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ------------------------------------------------------------------
|
|
66
|
+
// Empirical-confusion overlay
|
|
67
|
+
// ------------------------------------------------------------------
|
|
68
|
+
//
|
|
69
|
+
// Pairs whose acoustic distance under `crate::distance` overstates the
|
|
70
|
+
// perceptual gap. The table mixes cross-variety findings (Miller-Nicely
|
|
71
|
+
// 1955, generic English speech-perception studies) with American-
|
|
72
|
+
// English-specific mergers — most of the overlay is calibrated against
|
|
73
|
+
// a West Coast American baseline because that's the dialect the
|
|
74
|
+
// maintainer hears natively.
|
|
75
|
+
|
|
76
|
+
/// (sym_a, sym_b, perceptual_cost). The lookup treats pairs as
|
|
77
|
+
/// unordered: sub_cost(a, b) == sub_cost(b, a).
|
|
78
|
+
const OVERLAY: &[(&str, &str, f64)] = &[
|
|
79
|
+
// Cross-variety
|
|
80
|
+
("θ", "t", 0.18),
|
|
81
|
+
("ð", "d", 0.18),
|
|
82
|
+
("θ", "s", 0.12),
|
|
83
|
+
("ð", "z", 0.12),
|
|
84
|
+
("p", "f", 0.20),
|
|
85
|
+
("b", "v", 0.20),
|
|
86
|
+
("t", "s", 0.20),
|
|
87
|
+
("d", "z", 0.20),
|
|
88
|
+
("l", "ɹ", 0.15),
|
|
89
|
+
// American (esp. WCE)
|
|
90
|
+
("t", "ɾ", 0.10),
|
|
91
|
+
("d", "ɾ", 0.05),
|
|
92
|
+
("ɑ", "ɔ", 0.05),
|
|
93
|
+
("ɑ", "ɒ", 0.05),
|
|
94
|
+
("t", "ʔ", 0.08),
|
|
95
|
+
("d", "ʔ", 0.20),
|
|
96
|
+
("u", "y", 0.15),
|
|
97
|
+
("u", "ɯ", 0.15),
|
|
98
|
+
("u", "ʉ", 0.10),
|
|
99
|
+
("o", "ə", 0.20),
|
|
100
|
+
("ʌ", "ɑ", 0.10),
|
|
101
|
+
];
|
|
102
|
+
|
|
103
|
+
/// Returns the overlay cost for an unordered pair, if present.
|
|
104
|
+
fn overlay_cost(a: &str, b: &str) -> Option<f64> {
|
|
105
|
+
for &(x, y, cost) in OVERLAY {
|
|
106
|
+
if (x == a && y == b) || (x == b && y == a) {
|
|
107
|
+
return Some(cost);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
None
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/// Per-phoneme substitution cost used by the Confusion DP. Identity
|
|
114
|
+
/// short-circuits; then the overlay; then the acoustic metric.
|
|
115
|
+
pub fn sub_cost(a: &str, b: &str) -> f64 {
|
|
116
|
+
if a == b {
|
|
117
|
+
return 0.0;
|
|
118
|
+
}
|
|
119
|
+
if let Some(c) = overlay_cost(a, b) {
|
|
120
|
+
return c;
|
|
121
|
+
}
|
|
122
|
+
phoneme_distance(a, b)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ------------------------------------------------------------------
|
|
126
|
+
// Gotoh DP
|
|
127
|
+
// ------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
/// Listener-confusion distance between two IPA strings.
|
|
130
|
+
pub fn distance(s1: &str, s2: &str) -> f64 {
|
|
131
|
+
let a = tokenizer::tokens(s1, true);
|
|
132
|
+
let b = tokenizer::tokens(s2, true);
|
|
133
|
+
distance_from_tokens(&a, &b)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/// 0..1 normalised similarity score. Worst case is one substitution
|
|
137
|
+
/// per position in the longer string; dividing by max(len) gives a
|
|
138
|
+
/// bounded judgement comparable across phrase lengths.
|
|
139
|
+
pub fn similarity(s1: &str, s2: &str) -> f64 {
|
|
140
|
+
let a = tokenizer::tokens(s1, true);
|
|
141
|
+
let b = tokenizer::tokens(s2, true);
|
|
142
|
+
let max_n = a.len().max(b.len());
|
|
143
|
+
if max_n == 0 {
|
|
144
|
+
return 1.0;
|
|
145
|
+
}
|
|
146
|
+
let d = distance_from_tokens(&a, &b);
|
|
147
|
+
(1.0 - d / max_n as f64).max(0.0)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/// Confusion distance over pre-tokenised phoneme sequences.
|
|
151
|
+
#[allow(clippy::too_many_lines)]
|
|
152
|
+
pub fn distance_from_tokens<S: AsRef<str>>(a: &[S], b: &[S]) -> f64 {
|
|
153
|
+
let m = a.len();
|
|
154
|
+
let n = b.len();
|
|
155
|
+
if m == 0 && n == 0 {
|
|
156
|
+
return 0.0;
|
|
157
|
+
}
|
|
158
|
+
if m == 0 {
|
|
159
|
+
return seed_cost(b);
|
|
160
|
+
}
|
|
161
|
+
if n == 0 {
|
|
162
|
+
return seed_cost(a);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
let width = n + 1;
|
|
166
|
+
let cells = (m + 1) * width;
|
|
167
|
+
let inf = INF;
|
|
168
|
+
let mut mm = vec![inf; cells];
|
|
169
|
+
let mut xx = vec![inf; cells];
|
|
170
|
+
let mut yy = vec![inf; cells];
|
|
171
|
+
mm[0] = 0.0;
|
|
172
|
+
|
|
173
|
+
// Seed gap-only edges.
|
|
174
|
+
for i in 1..=m {
|
|
175
|
+
let ph = a[i - 1].as_ref();
|
|
176
|
+
let step = indel_step(ph, i == 1);
|
|
177
|
+
let prev = if i == 1 { 0.0 } else { xx[(i - 1) * width] };
|
|
178
|
+
xx[i * width] = prev + step;
|
|
179
|
+
}
|
|
180
|
+
for j in 1..=n {
|
|
181
|
+
let ph = b[j - 1].as_ref();
|
|
182
|
+
let step = indel_step(ph, j == 1);
|
|
183
|
+
let prev = if j == 1 { 0.0 } else { yy[j - 1] };
|
|
184
|
+
yy[j] = prev + step;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
for i in 1..=m {
|
|
188
|
+
let ai = a[i - 1].as_ref();
|
|
189
|
+
let a_weak_cost = weak_indel_cost(ai);
|
|
190
|
+
for j in 1..=n {
|
|
191
|
+
let bj = b[j - 1].as_ref();
|
|
192
|
+
let b_weak_cost = weak_indel_cost(bj);
|
|
193
|
+
|
|
194
|
+
let here = i * width + j;
|
|
195
|
+
let up = (i - 1) * width + j;
|
|
196
|
+
let left = i * width + (j - 1);
|
|
197
|
+
let diag = (i - 1) * width + (j - 1);
|
|
198
|
+
|
|
199
|
+
// M: end in match/mismatch.
|
|
200
|
+
mm[here] = min3(mm[diag], xx[diag], yy[diag]) + sub_cost(ai, bj);
|
|
201
|
+
|
|
202
|
+
// X: end in an a-consuming gap.
|
|
203
|
+
xx[here] = if let Some(c) = a_weak_cost {
|
|
204
|
+
min3(mm[up], xx[up], yy[up]) + c
|
|
205
|
+
} else {
|
|
206
|
+
min3(mm[up] + GAP_OPEN, xx[up] + GAP_EXTEND, yy[up] + GAP_OPEN)
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
// Y: end in a b-consuming gap.
|
|
210
|
+
yy[here] = if let Some(c) = b_weak_cost {
|
|
211
|
+
min3(mm[left], xx[left], yy[left]) + c
|
|
212
|
+
} else {
|
|
213
|
+
min3(mm[left] + GAP_OPEN, yy[left] + GAP_EXTEND, xx[left] + GAP_OPEN)
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
let last = m * width + n;
|
|
219
|
+
min3(mm[last], xx[last], yy[last])
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// ------------------------------------------------------------------
|
|
223
|
+
// Helpers
|
|
224
|
+
// ------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
const INF: f64 = 1e18;
|
|
227
|
+
|
|
228
|
+
#[inline]
|
|
229
|
+
fn min3(a: f64, b: f64, c: f64) -> f64 {
|
|
230
|
+
a.min(b).min(c)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
fn indel_step(phoneme: &str, opening: bool) -> f64 {
|
|
234
|
+
if let Some(c) = weak_indel_cost(phoneme) {
|
|
235
|
+
return c;
|
|
236
|
+
}
|
|
237
|
+
if opening { GAP_OPEN } else { GAP_EXTEND }
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
fn seed_cost<S: AsRef<str>>(tokens: &[S]) -> f64 {
|
|
241
|
+
let mut total = 0.0;
|
|
242
|
+
for (i, ph) in tokens.iter().enumerate() {
|
|
243
|
+
total += indel_step(ph.as_ref(), i == 0);
|
|
244
|
+
}
|
|
245
|
+
total
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
#[cfg(test)]
|
|
249
|
+
mod tests {
|
|
250
|
+
use super::*;
|
|
251
|
+
|
|
252
|
+
const EPS: f64 = 1e-12;
|
|
253
|
+
|
|
254
|
+
#[test]
|
|
255
|
+
fn matches_ruby_reference_distances() {
|
|
256
|
+
// Pure-Ruby reference values (f64).
|
|
257
|
+
let cases: &[(&str, &str, f64)] = &[
|
|
258
|
+
("kæt", "kæt", 0.0),
|
|
259
|
+
("kæt", "kʌt", 0.145_085_455_502_268_37),
|
|
260
|
+
("ɪtsdʒʌstəstupɪdgeɪm",
|
|
261
|
+
"hɪtsdʒʌstɪsduphɪdkeɪm",
|
|
262
|
+
0.769_519_814_165_789_6),
|
|
263
|
+
("ɪtsdʒʌstəstupɪdgeɪm",
|
|
264
|
+
"jɔrmʌðɝwɛrzsneɪkɝz",
|
|
265
|
+
6.485_176_104_558_604),
|
|
266
|
+
("æpəlpaɪ", "eɪppʊlpaɪ", 1.047_133_181_946_413_6),
|
|
267
|
+
("nidəkɔfi", "nidɑkhɔffi", 0.968_204_431_378_303_3),
|
|
268
|
+
("aɪlʌvju", "aɪlʌvju", 0.0),
|
|
269
|
+
("ɪts dʒʌst", "ɪt sdʒʌst", 0.04),
|
|
270
|
+
("stupɪd", "stupɪdli", 0.85),
|
|
271
|
+
("stupɪd", "hstupɪd", 0.15),
|
|
272
|
+
("", "", 0.0),
|
|
273
|
+
("kæt", "", 1.1),
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
for (a, b, expected) in cases {
|
|
277
|
+
let got = distance(a, b);
|
|
278
|
+
assert!(
|
|
279
|
+
(got - expected).abs() < EPS,
|
|
280
|
+
"distance({a:?}, {b:?}) = {got}, expected {expected}",
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
#[test]
|
|
286
|
+
fn empirical_overlay_fires_on_t_flapping() {
|
|
287
|
+
// /t/-/ɾ/ acoustically ~0.5; overlay drops it to 0.10.
|
|
288
|
+
assert!((sub_cost("t", "ɾ") - 0.10).abs() < EPS);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
#[test]
|
|
292
|
+
fn wce_overlay_fires_on_cot_caught() {
|
|
293
|
+
assert!((sub_cost("ɑ", "ɔ") - 0.05).abs() < EPS);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
#[test]
|
|
297
|
+
fn similarity_is_one_for_identity() {
|
|
298
|
+
assert_eq!(similarity("kæt", "kæt"), 1.0);
|
|
299
|
+
assert_eq!(similarity("", ""), 1.0);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
#[test]
|
|
303
|
+
fn similarity_separates_madgab_pair_from_decoy() {
|
|
304
|
+
let target = "ɪtsdʒʌstəstupɪdgeɪm";
|
|
305
|
+
let clue = "hɪtsdʒʌstɪsduphɪdkeɪm";
|
|
306
|
+
let decoy = "jɔrmʌðɝwɛrzsneɪkɝz";
|
|
307
|
+
let s_clue = similarity(target, clue);
|
|
308
|
+
let s_decoy = similarity(target, decoy);
|
|
309
|
+
assert!(s_clue - s_decoy >= 0.2,
|
|
310
|
+
"clue {s_clue} not >= decoy {s_decoy} + 0.2");
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
#[test]
|
|
314
|
+
fn boundary_indel_is_essentially_free() {
|
|
315
|
+
// Same phonemes, repositioned word boundary. The pure-phoneme
|
|
316
|
+
// contents are identical; confusion should reflect that.
|
|
317
|
+
assert!(distance("ɪts dʒʌst", "ɪt sdʒʌst") < 0.05);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
#[test]
|
|
321
|
+
fn weak_phoneme_indel_is_cheap() {
|
|
322
|
+
// Inserting /h/ at the head should cost roughly WEAK_INDEL_COST.
|
|
323
|
+
assert!((distance("stupɪd", "hstupɪd") - WEAK_INDEL_COST).abs() < 0.01);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
//! Consonant distance: voicing flip + manner-of-articulation rank +
|
|
2
|
+
//! 2D place-of-articulation embedding + lateral airflow penalty.
|
|
3
|
+
//!
|
|
4
|
+
//! Place is a Euclidean distance over an anatomical (x, y) embedding,
|
|
5
|
+
//! not the original 1-D column index. Labio-velar /w/ sits at the back
|
|
6
|
+
//! on x but at the lip end on y because it's articulated at both lips
|
|
7
|
+
//! and velum; the 1-D index put /w/ next to bilabial /m/ and far from
|
|
8
|
+
//! velar /k/, which is the opposite of the physics.
|
|
9
|
+
//!
|
|
10
|
+
//! Lateral airflow is an additive penalty so /l/ vs /ɹ/ — same place,
|
|
11
|
+
//! same voicing, both ranked "approximant" — comes out non-zero.
|
|
12
|
+
|
|
13
|
+
/// Place of articulation.
|
|
14
|
+
#[allow(missing_docs)]
|
|
15
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
16
|
+
pub enum Position {
|
|
17
|
+
LabioVelar,
|
|
18
|
+
BiLabial,
|
|
19
|
+
LabioDental,
|
|
20
|
+
LinguoLabial,
|
|
21
|
+
Dental,
|
|
22
|
+
Alveolar,
|
|
23
|
+
PostAlveolar,
|
|
24
|
+
RetroFlex,
|
|
25
|
+
Palatal,
|
|
26
|
+
Velar,
|
|
27
|
+
Uvular,
|
|
28
|
+
Pharyngeal,
|
|
29
|
+
Glottal,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
impl Position {
|
|
33
|
+
/// Anatomical 2D coordinates, both in [0, 1].
|
|
34
|
+
/// x: front-of-mouth (0) → back-of-mouth (1)
|
|
35
|
+
/// y: lip-articulator (0) → tongue/throat-articulator (1)
|
|
36
|
+
pub fn coords(self) -> (f64, f64) {
|
|
37
|
+
match self {
|
|
38
|
+
Self::LabioVelar => (0.95, 0.05),
|
|
39
|
+
Self::BiLabial => (0.00, 0.05),
|
|
40
|
+
Self::LabioDental => (0.10, 0.30),
|
|
41
|
+
Self::LinguoLabial => (0.05, 0.55),
|
|
42
|
+
Self::Dental => (0.20, 0.60),
|
|
43
|
+
Self::Alveolar => (0.30, 0.70),
|
|
44
|
+
Self::PostAlveolar => (0.40, 0.75),
|
|
45
|
+
Self::RetroFlex => (0.50, 0.80),
|
|
46
|
+
Self::Palatal => (0.60, 0.85),
|
|
47
|
+
Self::Velar => (0.80, 0.90),
|
|
48
|
+
Self::Uvular => (0.90, 0.95),
|
|
49
|
+
Self::Pharyngeal => (0.95, 1.00),
|
|
50
|
+
Self::Glottal => (1.00, 1.00),
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/// Manner of articulation.
|
|
56
|
+
#[allow(missing_docs)]
|
|
57
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
58
|
+
pub enum Manner {
|
|
59
|
+
Nasal,
|
|
60
|
+
Stop,
|
|
61
|
+
SibilantFricative,
|
|
62
|
+
NonSibilantFricative,
|
|
63
|
+
LateralFricative,
|
|
64
|
+
Approximant,
|
|
65
|
+
TapFlap,
|
|
66
|
+
Trill,
|
|
67
|
+
LateralApproximant,
|
|
68
|
+
LateralTapFlap,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
impl Manner {
|
|
72
|
+
/// Perceptual rank in [0, 1] along the sonority hierarchy.
|
|
73
|
+
pub fn score(self) -> f64 {
|
|
74
|
+
match self {
|
|
75
|
+
Self::Stop => 0.00,
|
|
76
|
+
Self::SibilantFricative => 0.50,
|
|
77
|
+
Self::NonSibilantFricative => 0.50,
|
|
78
|
+
Self::LateralFricative => 0.55,
|
|
79
|
+
Self::Nasal => 0.70,
|
|
80
|
+
Self::TapFlap => 0.85,
|
|
81
|
+
Self::LateralTapFlap => 0.85,
|
|
82
|
+
Self::Trill => 0.90,
|
|
83
|
+
Self::LateralApproximant => 1.00,
|
|
84
|
+
Self::Approximant => 1.00,
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/// True for manners that route airflow around the sides of the tongue.
|
|
89
|
+
pub fn is_lateral(self) -> bool {
|
|
90
|
+
matches!(
|
|
91
|
+
self,
|
|
92
|
+
Self::LateralFricative | Self::LateralApproximant | Self::LateralTapFlap
|
|
93
|
+
)
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/// Features of a consonant in the inventory.
|
|
98
|
+
#[derive(Debug, Clone, Copy, PartialEq)]
|
|
99
|
+
pub struct Consonant {
|
|
100
|
+
/// Place of articulation.
|
|
101
|
+
pub position: Position,
|
|
102
|
+
/// Manner of articulation.
|
|
103
|
+
pub manner: Manner,
|
|
104
|
+
/// True if the vocal folds vibrate.
|
|
105
|
+
pub voiced: bool,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/// Additive penalty when one consonant is voiced and the other isn't.
|
|
109
|
+
pub const VOICING_PENALTY: f64 = 0.15;
|
|
110
|
+
/// Maximum manner contribution to consonant distance.
|
|
111
|
+
pub const MANNER_SCALE: f64 = 0.45;
|
|
112
|
+
/// Maximum place contribution to consonant distance.
|
|
113
|
+
pub const PLACE_SCALE: f64 = 0.30;
|
|
114
|
+
/// Normaliser for the place Euclidean distance: max possible is sqrt(2).
|
|
115
|
+
pub const PLACE_NORM: f64 = std::f64::consts::SQRT_2;
|
|
116
|
+
/// Additive penalty when one consonant routes airflow laterally and the
|
|
117
|
+
/// other doesn't (e.g. /l/ vs /ɹ/).
|
|
118
|
+
pub const LATERAL_PENALTY: f64 = 0.10;
|
|
119
|
+
|
|
120
|
+
/// Every IPA consonant symbol the metric knows about, in canonical
|
|
121
|
+
/// (chart-traversal) order.
|
|
122
|
+
pub const INVENTORY: &[&str] = &[
|
|
123
|
+
// Nasals
|
|
124
|
+
"m̥","m","ɱ","n̼","n̥","n","ɳ̊","ɳ","ɲ̊","ɲ","ŋ̊","ŋ","ɴ",
|
|
125
|
+
// Stops
|
|
126
|
+
"p","b","p̪","b̪","t̼","d̼","t","d","ʈ","ɖ","c","ɟ","k","g","q","ɢ","ʡ","ʔ",
|
|
127
|
+
// Sibilant fricatives
|
|
128
|
+
"s","z","ʃ","ʒ","ʂ","ʐ","ɕ","ʑ",
|
|
129
|
+
// Non-sibilant fricatives
|
|
130
|
+
"ɸ","β","f","v","θ̼","ð̼","θ","ð","θ̠","ð̠","ɹ̠̊˔","ɹ̠˔","ɻ˔","ç","ʝ","x","ɣ","χ","ʁ","ħ","ʕ","h","ɦ",
|
|
131
|
+
// Approximants
|
|
132
|
+
"w","ʋ̥","ʋ","ɹ̥","ɹ","ɻ̊","ɻ","j̊","j","ɰ̊","ɰ","ʔ̞",
|
|
133
|
+
// Taps/flaps
|
|
134
|
+
"ⱱ̟","ⱱ","ɾ̼","ɾ̥","ɾ","ɽ̊","ɽ","ɢ̆","ʡ̆",
|
|
135
|
+
// Trills
|
|
136
|
+
"ʙ̥","ʙ","r̥","r","ʀ̥","ʀ","ʜ","ʢ",
|
|
137
|
+
// Lateral fricatives
|
|
138
|
+
"ɬ","ɮ","ɭ̊˔","ɭ˔","ʎ̝̊","ʎ̝","ʟ̝̊","ʟ̝",
|
|
139
|
+
// Lateral approximants
|
|
140
|
+
"l̥","l","ɭ̊","ɭ","ʎ̥","ʎ","ʟ̥","ʟ","ʟ̠",
|
|
141
|
+
// Lateral taps/flaps
|
|
142
|
+
"ɺ","ɭ̆","ʎ̆","ʟ̆",
|
|
143
|
+
];
|
|
144
|
+
|
|
145
|
+
/// Look up the feature data for an IPA consonant symbol.
|
|
146
|
+
///
|
|
147
|
+
/// The table is the full IPA pulmonic-consonant chart from
|
|
148
|
+
/// <https://en.wikipedia.org/wiki/International_Phonetic_Alphabet#Letters>,
|
|
149
|
+
/// transcribed once and embedded as a `match`.
|
|
150
|
+
#[allow(clippy::too_many_lines)]
|
|
151
|
+
pub fn lookup(symbol: &str) -> Option<Consonant> {
|
|
152
|
+
use Manner::*;
|
|
153
|
+
use Position::*;
|
|
154
|
+
let c = |position, manner, voiced| Consonant { position, manner, voiced };
|
|
155
|
+
Some(match symbol {
|
|
156
|
+
// Nasals
|
|
157
|
+
"m̥" => c(BiLabial, Nasal, false),
|
|
158
|
+
"m" => c(BiLabial, Nasal, true),
|
|
159
|
+
"ɱ" => c(LabioDental, Nasal, true),
|
|
160
|
+
"n̼" => c(LinguoLabial, Nasal, true),
|
|
161
|
+
"n̥" => c(Alveolar, Nasal, false),
|
|
162
|
+
"n" => c(Alveolar, Nasal, true),
|
|
163
|
+
"ɳ̊" => c(RetroFlex, Nasal, false),
|
|
164
|
+
"ɳ" => c(RetroFlex, Nasal, true),
|
|
165
|
+
"ɲ̊" => c(Palatal, Nasal, false),
|
|
166
|
+
"ɲ" => c(Palatal, Nasal, true),
|
|
167
|
+
"ŋ̊" => c(Velar, Nasal, false),
|
|
168
|
+
"ŋ" => c(Velar, Nasal, true),
|
|
169
|
+
"ɴ" => c(Uvular, Nasal, true),
|
|
170
|
+
// Stops
|
|
171
|
+
"p" => c(BiLabial, Stop, false),
|
|
172
|
+
"b" => c(BiLabial, Stop, true),
|
|
173
|
+
"p̪" => c(LabioDental, Stop, false),
|
|
174
|
+
"b̪" => c(LabioDental, Stop, true),
|
|
175
|
+
"t̼" => c(LinguoLabial, Stop, false),
|
|
176
|
+
"d̼" => c(LinguoLabial, Stop, true),
|
|
177
|
+
"t" => c(Alveolar, Stop, false),
|
|
178
|
+
"d" => c(Alveolar, Stop, true),
|
|
179
|
+
"ʈ" => c(RetroFlex, Stop, false),
|
|
180
|
+
"ɖ" => c(RetroFlex, Stop, true),
|
|
181
|
+
"c" => c(Palatal, Stop, false),
|
|
182
|
+
"ɟ" => c(Palatal, Stop, true),
|
|
183
|
+
"k" => c(Velar, Stop, false),
|
|
184
|
+
"g" => c(Velar, Stop, true),
|
|
185
|
+
"q" => c(Uvular, Stop, false),
|
|
186
|
+
"ɢ" => c(Uvular, Stop, true),
|
|
187
|
+
"ʡ" => c(Pharyngeal, Stop, false),
|
|
188
|
+
"ʔ" => c(Glottal, Stop, false),
|
|
189
|
+
// Sibilant fricatives
|
|
190
|
+
"s" => c(Alveolar, SibilantFricative, false),
|
|
191
|
+
"z" => c(Alveolar, SibilantFricative, true),
|
|
192
|
+
"ʃ" => c(PostAlveolar, SibilantFricative, false),
|
|
193
|
+
"ʒ" => c(PostAlveolar, SibilantFricative, true),
|
|
194
|
+
"ʂ" => c(RetroFlex, SibilantFricative, false),
|
|
195
|
+
"ʐ" => c(RetroFlex, SibilantFricative, true),
|
|
196
|
+
"ɕ" => c(Palatal, SibilantFricative, false),
|
|
197
|
+
"ʑ" => c(Palatal, SibilantFricative, true),
|
|
198
|
+
// Non-sibilant fricatives
|
|
199
|
+
"ɸ" => c(BiLabial, NonSibilantFricative, false),
|
|
200
|
+
"β" => c(BiLabial, NonSibilantFricative, true),
|
|
201
|
+
"f" => c(LabioDental, NonSibilantFricative, false),
|
|
202
|
+
"v" => c(LabioDental, NonSibilantFricative, true),
|
|
203
|
+
"θ̼" => c(LinguoLabial, NonSibilantFricative, false),
|
|
204
|
+
"ð̼" => c(LinguoLabial, NonSibilantFricative, true),
|
|
205
|
+
"θ" => c(Dental, NonSibilantFricative, false),
|
|
206
|
+
"ð" => c(Dental, NonSibilantFricative, true),
|
|
207
|
+
"θ̠" => c(Alveolar, NonSibilantFricative, false),
|
|
208
|
+
"ð̠" => c(Alveolar, NonSibilantFricative, true),
|
|
209
|
+
"ɹ̠̊˔" => c(PostAlveolar, NonSibilantFricative, false),
|
|
210
|
+
"ɹ̠˔" => c(PostAlveolar, NonSibilantFricative, true),
|
|
211
|
+
"ɻ˔" => c(RetroFlex, NonSibilantFricative, true),
|
|
212
|
+
"ç" => c(Palatal, NonSibilantFricative, false),
|
|
213
|
+
"ʝ" => c(Palatal, NonSibilantFricative, true),
|
|
214
|
+
"x" => c(Velar, NonSibilantFricative, false),
|
|
215
|
+
"ɣ" => c(Velar, NonSibilantFricative, true),
|
|
216
|
+
"χ" => c(Uvular, NonSibilantFricative, false),
|
|
217
|
+
"ʁ" => c(Uvular, NonSibilantFricative, true),
|
|
218
|
+
"ħ" => c(Pharyngeal, NonSibilantFricative, false),
|
|
219
|
+
"ʕ" => c(Pharyngeal, NonSibilantFricative, true),
|
|
220
|
+
"h" => c(Glottal, NonSibilantFricative, false),
|
|
221
|
+
"ɦ" => c(Glottal, NonSibilantFricative, true),
|
|
222
|
+
// Approximants
|
|
223
|
+
"w" => c(LabioVelar, Approximant, true),
|
|
224
|
+
"ʋ̥" => c(LabioDental, Approximant, false),
|
|
225
|
+
"ʋ" => c(LabioDental, Approximant, true),
|
|
226
|
+
"ɹ̥" => c(Alveolar, Approximant, false),
|
|
227
|
+
"ɹ" => c(Alveolar, Approximant, true),
|
|
228
|
+
"ɻ̊" => c(RetroFlex, Approximant, false),
|
|
229
|
+
"ɻ" => c(RetroFlex, Approximant, true),
|
|
230
|
+
"j̊" => c(Palatal, Approximant, false),
|
|
231
|
+
"j" => c(Palatal, Approximant, true),
|
|
232
|
+
"ɰ̊" => c(Velar, Approximant, false),
|
|
233
|
+
"ɰ" => c(Velar, Approximant, true),
|
|
234
|
+
"ʔ̞" => c(Glottal, Approximant, true),
|
|
235
|
+
// Taps/flaps
|
|
236
|
+
"ⱱ̟" => c(BiLabial, TapFlap, true),
|
|
237
|
+
"ⱱ" => c(LabioDental, TapFlap, true),
|
|
238
|
+
"ɾ̼" => c(LinguoLabial, TapFlap, true),
|
|
239
|
+
"ɾ̥" => c(Alveolar, TapFlap, false),
|
|
240
|
+
"ɾ" => c(Alveolar, TapFlap, true),
|
|
241
|
+
"ɽ̊" => c(RetroFlex, TapFlap, false),
|
|
242
|
+
"ɽ" => c(RetroFlex, TapFlap, true),
|
|
243
|
+
"ɢ̆" => c(Uvular, TapFlap, true),
|
|
244
|
+
"ʡ̆" => c(Pharyngeal, TapFlap, true),
|
|
245
|
+
// Trills
|
|
246
|
+
"ʙ̥" => c(BiLabial, Trill, false),
|
|
247
|
+
"ʙ" => c(BiLabial, Trill, true),
|
|
248
|
+
"r̥" => c(Alveolar, Trill, false),
|
|
249
|
+
"r" => c(Alveolar, Trill, true),
|
|
250
|
+
"ʀ̥" => c(Uvular, Trill, false),
|
|
251
|
+
"ʀ" => c(Uvular, Trill, true),
|
|
252
|
+
"ʜ" => c(Pharyngeal, Trill, false),
|
|
253
|
+
"ʢ" => c(Pharyngeal, Trill, true),
|
|
254
|
+
// Lateral fricatives
|
|
255
|
+
"ɬ" => c(Alveolar, LateralFricative, false),
|
|
256
|
+
"ɮ" => c(Alveolar, LateralFricative, true),
|
|
257
|
+
"ɭ̊˔" => c(RetroFlex, LateralFricative, false),
|
|
258
|
+
"ɭ˔" => c(RetroFlex, LateralFricative, true),
|
|
259
|
+
"ʎ̝̊" => c(Palatal, LateralFricative, false),
|
|
260
|
+
"ʎ̝" => c(Palatal, LateralFricative, true),
|
|
261
|
+
"ʟ̝̊" => c(Velar, LateralFricative, false),
|
|
262
|
+
"ʟ̝" => c(Velar, LateralFricative, true),
|
|
263
|
+
// Lateral approximants
|
|
264
|
+
"l̥" => c(Alveolar, LateralApproximant, false),
|
|
265
|
+
"l" => c(Alveolar, LateralApproximant, true),
|
|
266
|
+
"ɭ̊" => c(RetroFlex, LateralApproximant, false),
|
|
267
|
+
"ɭ" => c(RetroFlex, LateralApproximant, true),
|
|
268
|
+
"ʎ̥" => c(Palatal, LateralApproximant, false),
|
|
269
|
+
"ʎ" => c(Palatal, LateralApproximant, true),
|
|
270
|
+
"ʟ̥" => c(Velar, LateralApproximant, false),
|
|
271
|
+
"ʟ" => c(Velar, LateralApproximant, true),
|
|
272
|
+
"ʟ̠" => c(Uvular, LateralApproximant, true),
|
|
273
|
+
// Lateral taps/flaps
|
|
274
|
+
"ɺ" => c(Alveolar, LateralTapFlap, true),
|
|
275
|
+
"ɭ̆" => c(RetroFlex, LateralTapFlap, true),
|
|
276
|
+
"ʎ̆" => c(Palatal, LateralTapFlap, true),
|
|
277
|
+
"ʟ̆" => c(Velar, LateralTapFlap, true),
|
|
278
|
+
_ => return None,
|
|
279
|
+
})
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/// Distance between two consonants, scaled into [0, 1].
|
|
283
|
+
///
|
|
284
|
+
/// Returns `None` if either symbol is unknown.
|
|
285
|
+
pub fn distance(p1: &str, p2: &str) -> Option<f64> {
|
|
286
|
+
if p1 == p2 {
|
|
287
|
+
return Some(0.0);
|
|
288
|
+
}
|
|
289
|
+
let c1 = lookup(p1)?;
|
|
290
|
+
let c2 = lookup(p2)?;
|
|
291
|
+
|
|
292
|
+
let mut penalty = 0.0;
|
|
293
|
+
if c1.voiced != c2.voiced {
|
|
294
|
+
penalty += VOICING_PENALTY;
|
|
295
|
+
}
|
|
296
|
+
penalty += MANNER_SCALE * (c1.manner.score() - c2.manner.score()).abs();
|
|
297
|
+
if c1.manner.is_lateral() != c2.manner.is_lateral() {
|
|
298
|
+
penalty += LATERAL_PENALTY;
|
|
299
|
+
}
|
|
300
|
+
let (x1, y1) = c1.position.coords();
|
|
301
|
+
let (x2, y2) = c2.position.coords();
|
|
302
|
+
penalty += PLACE_SCALE * ((x1 - x2).powi(2) + (y1 - y2).powi(2)).sqrt() / PLACE_NORM;
|
|
303
|
+
|
|
304
|
+
Some(penalty.min(1.0))
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
#[cfg(test)]
|
|
308
|
+
mod tests {
|
|
309
|
+
use super::*;
|
|
310
|
+
|
|
311
|
+
const EPS: f64 = 1e-12;
|
|
312
|
+
|
|
313
|
+
#[test]
|
|
314
|
+
fn matches_ruby_consonant_distances() {
|
|
315
|
+
// Reference values produced by the Ruby implementation.
|
|
316
|
+
let cases: &[(&str, &str, f64)] = &[
|
|
317
|
+
("p", "b", 0.15),
|
|
318
|
+
("p", "t", 0.151_863_425_484_874_36),
|
|
319
|
+
("p", "k", 0.247_613_610_288_287_66),
|
|
320
|
+
("m", "n", 0.151_863_425_484_874_36),
|
|
321
|
+
("s", "z", 0.15),
|
|
322
|
+
("s", "ʃ", 0.023_717_082_451_262_854),
|
|
323
|
+
("s", "t", 0.225),
|
|
324
|
+
("n", "l", 0.235_000_000_000_000_04),
|
|
325
|
+
("ŋ", "g", 0.315),
|
|
326
|
+
("l", "ɹ", 0.1),
|
|
327
|
+
("h", "k", 0.272_434_164_902_525_7),
|
|
328
|
+
("θ", "t", 0.255),
|
|
329
|
+
("ð", "d", 0.255),
|
|
330
|
+
("w", "j", 0.185_236_335_528_427),
|
|
331
|
+
("ɮ", "z", 0.122_500_000_000_000_03),
|
|
332
|
+
];
|
|
333
|
+
|
|
334
|
+
for (a, b, expected) in cases {
|
|
335
|
+
let got = distance(a, b).expect("inventory pair");
|
|
336
|
+
assert!(
|
|
337
|
+
(got - expected).abs() < EPS,
|
|
338
|
+
"distance({a:?}, {b:?}) = {got}, expected {expected}",
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
#[test]
|
|
344
|
+
fn identity_is_zero() {
|
|
345
|
+
// A representative subset; the full inventory is large.
|
|
346
|
+
for s in ["p", "b", "t", "k", "m", "n", "s", "ʃ", "l", "ɹ", "ŋ", "ɮ"] {
|
|
347
|
+
assert_eq!(distance(s, s), Some(0.0));
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
#[test]
|
|
352
|
+
fn l_and_r_are_not_tied_at_zero() {
|
|
353
|
+
// Without LATERAL_PENALTY, /l/ and /ɹ/ would both be alveolar
|
|
354
|
+
// approximants with manner rank 1.0 and voicing match — distance 0.
|
|
355
|
+
let d = distance("l", "ɹ").unwrap();
|
|
356
|
+
assert!(d > 0.0, "/l/-/ɹ/ should be > 0, got {d}");
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
#[test]
|
|
360
|
+
fn unknown_symbol_returns_none() {
|
|
361
|
+
assert!(distance("Z", "p").is_none());
|
|
362
|
+
}
|
|
363
|
+
}
|