RubyGems - phonetics - Versions diffs - 3.0.9 → 4.0.0 - Mend

phonetics 3.0.9 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/.gitignore +17 -2
data/CHANGELOG +4 -0
data/Cargo.toml +27 -0
data/Rakefile +58 -26
data/VERSION +1 -1
data/bin/phonetics +89 -0
data/ext/phonetics_ruby/Cargo.toml +36 -0
data/ext/phonetics_ruby/build.rs +24 -0
data/ext/phonetics_ruby/extconf.rb +17 -0
data/ext/phonetics_ruby/src/lib.rs +56 -0
data/ext/phonetics_ruby/vendor/phonetics/Cargo.toml +30 -0
data/ext/phonetics_ruby/vendor/phonetics/README.md +29 -0
data/ext/phonetics_ruby/vendor/phonetics/src/compounds.rs +40 -0
data/ext/phonetics_ruby/vendor/phonetics/src/confusion.rs +325 -0
data/ext/phonetics_ruby/vendor/phonetics/src/consonants.rs +363 -0
data/ext/phonetics_ruby/vendor/phonetics/src/cross_class.rs +56 -0
data/ext/phonetics_ruby/vendor/phonetics/src/diacritics.rs +113 -0
data/ext/phonetics_ruby/vendor/phonetics/src/distance.rs +183 -0
data/ext/phonetics_ruby/vendor/phonetics/src/levenshtein.rs +146 -0
data/ext/phonetics_ruby/vendor/phonetics/src/lib.rs +44 -0
data/ext/phonetics_ruby/vendor/phonetics/src/symbols.rs +21 -0
data/ext/phonetics_ruby/vendor/phonetics/src/tokenizer.rs +171 -0
data/ext/phonetics_ruby/vendor/phonetics/src/vowels.rs +197 -0
data/lib/phonetics.rb +77 -2
data/phonetics.gemspec +33 -9
metadata +46 -34
data/.github/workflows/gempush.yml +0 -28
data/.github/workflows/test.yml +0 -20
data/Makefile +0 -6
data/ext/c_levenshtein/extconf.rb +0 -10
data/ext/c_levenshtein/levenshtein.c +0 -223
data/ext/c_levenshtein/next_phoneme_length.c +0 -1365
data/ext/c_levenshtein/next_phoneme_length.h +0 -1
data/ext/c_levenshtein/phonemes.c +0 -53
data/ext/c_levenshtein/phonemes.h +0 -3
data/ext/c_levenshtein/phonetic_cost.c +0 -88593
data/ext/c_levenshtein/phonetic_cost.h +0 -1
data/lib/phonetics/code_generator.rb +0 -228
data/lib/phonetics/distances.rb +0 -245
data/lib/phonetics/levenshtein.rb +0 -27
data/lib/phonetics/ruby_levenshtein.rb +0 -162

data/ext/phonetics_ruby/vendor/phonetics/src/tokenizer.rs ADDED Viewed

@@ -0,0 +1,171 @@
+//! IPA phoneme tokenizer.
+//!
+//! Walks an input string and emits a sequence of phoneme tokens. The
+//! recognition is longest-prefix: multi-character atoms like /tʃ/,
+//! /aɪ/, and /ɝ/ win over their single-character constituents.
+//!
+//! Diacritics absorb into the segment they modify — trailing
+//! modifiers attach to the preceding base phoneme, stress marks
+//! attach to the following one. Whitespace is skipped by default; in
+//! boundary mode (used by the Confusion metric) each whitespace
+//! character emits the `#` boundary token.
+use std::collections::HashSet;
+use std::sync::LazyLock;
+use crate::{compounds, consonants, diacritics::Diacritic, symbols, vowels};
+/// Set of every recognised phoneme symbol. Includes the boundary token
+/// so longest-prefix matching can pick it up on raw `#` input.
+pub static PHONEME_SET: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
+    let mut s: HashSet<&'static str> = HashSet::new();
+    for &p in vowels::INVENTORY {
+        s.insert(p);
+    }
+    for &p in consonants::INVENTORY {
+        s.insert(p);
+    }
+    for &p in compounds::INVENTORY {
+        s.insert(p);
+    }
+    s.insert(symbols::BOUNDARY_TOKEN);
+    s
+});
+/// Largest phoneme-symbol size in characters (not bytes). Used as the
+/// upper bound for longest-prefix matching.
+pub static MAX_PHONEME_CHARS: LazyLock<usize> = LazyLock::new(|| {
+    PHONEME_SET
+        .iter()
+        .map(|s| s.chars().count())
+        .max()
+        .unwrap_or(1)
+});
+/// True if `s` is a recognised phoneme symbol.
+pub fn is_phoneme(s: &str) -> bool {
+    PHONEME_SET.contains(s)
+}
+/// Characters that represent a word boundary in raw IPA input.
+const BOUNDARY_CHARS: &[char] = &[' ', '\t', '_', '|'];
+/// Tokenise an IPA string into a sequence of phoneme tokens.
+///
+/// When `boundaries` is true, each whitespace / boundary character
+/// in the input emits the `#` token; otherwise they're skipped.
+pub fn tokens(input: &str, boundaries: bool) -> Vec<String> {
+    let chars: Vec<char> = input.chars().collect();
+    let max_phoneme_size = *MAX_PHONEME_CHARS;
+    let mut out: Vec<String> = Vec::new();
+    let mut pending_prefix = String::new();
+    let mut idx = 0;
+    while idx < chars.len() {
+        let ch = chars[idx];
+        if BOUNDARY_CHARS.contains(&ch) {
+            if boundaries {
+                out.push(symbols::BOUNDARY_TOKEN.to_string());
+            }
+            idx += 1;
+            continue;
+        }
+        // Stress marks bind forward; carry them onto the next emitted token.
+        if let Some(d) = Diacritic::from_char(ch) {
+            if d.is_leading() {
+                pending_prefix.push(ch);
+                idx += 1;
+                continue;
+            }
+        }
+        // Try longest-prefix match against the recognized inventory.
+        let mut matched: Option<String> = None;
+        let max = max_phoneme_size.min(chars.len() - idx);
+        for size in (1..=max).rev() {
+            let candidate: String = chars[idx..idx + size].iter().collect();
+            if is_phoneme(&candidate) {
+                matched = Some(candidate);
+                idx += size;
+                break;
+            }
+        }
+        if let Some(base) = matched {
+            let mut token = std::mem::take(&mut pending_prefix);
+            token.push_str(&base);
+            // Absorb any trailing diacritics that modify this phoneme.
+            while idx < chars.len() {
+                let next = chars[idx];
+                match Diacritic::from_char(next) {
+                    Some(d) if !d.is_leading() => {
+                        token.push(next);
+                        idx += 1;
+                    }
+                    _ => break,
+                }
+            }
+            out.push(token);
+        } else {
+            // No recognised phoneme starts here; skip one character.
+            idx += 1;
+        }
+    }
+    out
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    fn t(s: &str) -> Vec<String> {
+        tokens(s, false)
+    }
+    fn tb(s: &str) -> Vec<String> {
+        tokens(s, true)
+    }
+    #[test]
+    fn matches_ruby_reference_tokenisations() {
+        // Reference outputs produced by the Ruby implementation.
+        let cases: &[(&str, &[&str], &[&str])] = &[
+            ("kæt",        &["k","æ","t"],                                       &["k","æ","t"]),
+            ("wətɛvɝ",     &["w","ə","t","ɛ","v","ɝ"],                           &["w","ə","t","ɛ","v","ɝ"]),
+            ("kuɹzlɑɪt",   &["k","u","ɹ","z","l","ɑɪ","t"],                       &["k","u","ɹ","z","l","ɑɪ","t"]),
+            ("dʒʌstɪs",    &["dʒ","ʌ","s","t","ɪ","s"],                            &["dʒ","ʌ","s","t","ɪ","s"]),
+            ("tʃɝtʃ",      &["tʃ","ɝ","tʃ"],                                       &["tʃ","ɝ","tʃ"]),
+            ("stupɪdgeɪm", &["s","t","u","p","ɪ","d","g","eɪ","m"],                 &["s","t","u","p","ɪ","d","g","eɪ","m"]),
+            ("wə t 9 ɛvɝ", &["w","ə","t","ɛ","v","ɝ"],                              &["w","ə","#","t","#","#","ɛ","v","ɝ"]),
+            ("pʰɪt",       &["pʰ","ɪ","t"],                                        &["pʰ","ɪ","t"]),
+            ("kʰæt̃",       &["kʰ","æ","t̃"],                                        &["kʰ","æ","t̃"]),
+            ("ˈstop",      &["ˈs","t","o","p"],                                    &["ˈs","t","o","p"]),
+            ("ˌɪntɝˈnæʃənl", &["ˌɪ","n","t","ɝ","ˈn","æ","ʃ","ə","n","l"],            &["ˌɪ","n","t","ɝ","ˈn","æ","ʃ","ə","n","l"]),
+            ("stuːpɪd",    &["s","t","uː","p","ɪ","d"],                            &["s","t","uː","p","ɪ","d"]),
+            ("aɪlʌvju",    &["aɪ","l","ʌ","v","j","u"],                            &["aɪ","l","ʌ","v","j","u"]),
+        ];
+        for (input, bare, with_bounds) in cases {
+            let got_bare = t(input);
+            let got_bnds = tb(input);
+            let want_bare: Vec<String> = bare.iter().map(|s| s.to_string()).collect();
+            let want_bnds: Vec<String> = with_bounds.iter().map(|s| s.to_string()).collect();
+            assert_eq!(got_bare, want_bare, "bare tokenisation diverged for {input:?}");
+            assert_eq!(got_bnds, want_bnds, "boundary tokenisation diverged for {input:?}");
+        }
+    }
+    #[test]
+    fn skips_unknown_characters() {
+        assert_eq!(t("k9æt"), vec!["k".to_string(), "æ".to_string(), "t".to_string()]);
+    }
+    #[test]
+    fn empty_input_yields_empty_output() {
+        assert!(t("").is_empty());
+        assert!(tb("").is_empty());
+    }
+}

data/ext/phonetics_ruby/vendor/phonetics/src/vowels.rs ADDED Viewed

@@ -0,0 +1,197 @@
+//! Vowel distance in Bark-Euclidean space.
+//!
+//! F1 and F2 are stored in Hz but compared in Bark via the Traunmüller
+//! (1990) approximation, because pitch perception is logarithmic and a
+//! 200 Hz shift at F1=300 is enormous while the same shift at F2=2200
+//! is barely audible. Roundedness and rhoticity are additive penalties
+//! on top of the formant distance.
+use std::sync::LazyLock;
+/// Acoustic properties of one vowel in the inventory.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct Vowel {
+    /// First formant frequency in Hz.
+    pub f1: f64,
+    /// Second formant frequency in Hz.
+    pub f2: f64,
+    /// Lip rounding.
+    pub rounded: bool,
+    /// Rhoticity (for /ɝ/).
+    pub rhotic: bool,
+}
+/// Tunable: vowels share a perceptually narrower space than consonants,
+/// so the formant contribution is capped well below 1.0.
+pub const VOWEL_SCALE: f64 = 0.60;
+/// Additive penalty when one vowel is rounded and the other isn't.
+pub const ROUNDING_PENALTY: f64 = 0.05;
+/// Additive penalty for rhoticity mismatch.
+pub const RHOTICITY_PENALTY: f64 = 0.20;
+/// Hz → Bark. Traunmüller (1990) approximation.
+pub fn bark(hz: f64) -> f64 {
+    if hz <= 0.0 {
+        return 0.0;
+    }
+    13.0 * (0.000_76 * hz).atan() + 3.5 * (hz / 7500.0).powi(2).atan()
+}
+/// IPA symbols in this inventory, in canonical order.
+pub const INVENTORY: &[&str] = &[
+    "i", "y", "ɪ", "e", "ø", "ɛ", "œ", "a", "ɶ", "ɑ", "ɒ",
+    "ʌ", "ə", "ɝ", "ɔ", "ɤ", "o", "ɯ", "æ", "u", "ʊ",
+];
+/// Look up the formant data for an IPA vowel symbol.
+///
+/// Values from the cardinal-vowel measurements on Wikipedia (Daniel
+/// Jones tradition), with the typo on /y/'s rounding flag corrected
+/// from the original Ruby table and /ə/ no longer duplicating /ʌ/.
+pub fn lookup(symbol: &str) -> Option<Vowel> {
+    let v = |f1, f2, rounded, rhotic| Vowel { f1, f2, rounded, rhotic };
+    Some(match symbol {
+        "i" => v(240.0, 2400.0, false, false),
+        "y" => v(235.0, 2100.0, true,  false),
+        "ɪ" => v(300.0, 2100.0, false, false),
+        "e" => v(390.0, 2300.0, false, false),
+        "ø" => v(370.0, 1900.0, true,  false),
+        "ɛ" => v(610.0, 1900.0, false, false),
+        "œ" => v(585.0, 1710.0, true,  false),
+        "a" => v(850.0, 1610.0, false, false),
+        "ɶ" => v(820.0, 1530.0, true,  false),
+        "ɑ" => v(750.0, 940.0,  false, false),
+        "ɒ" => v(700.0, 760.0,  true,  false),
+        "ʌ" => v(600.0, 1170.0, false, false),
+        "ə" => v(500.0, 1500.0, false, false),
+        "ɝ" => v(500.0, 1350.0, false, true),
+        "ɔ" => v(500.0, 700.0,  true,  false),
+        "ɤ" => v(460.0, 1310.0, false, false),
+        "o" => v(360.0, 640.0,  true,  false),
+        "ɯ" => v(300.0, 1390.0, false, false),
+        "æ" => v(690.0, 1660.0, false, false),
+        "u" => v(250.0, 595.0,  true,  false),
+        "ʊ" => v(380.0, 950.0,  true,  false),
+        _ => return None,
+    })
+}
+/// Largest Bark-Euclidean distance achievable within the inventory.
+/// Memoised; computed once on first access.
+static BARK_SPAN: LazyLock<f64> = LazyLock::new(|| {
+    let coords: Vec<(f64, f64)> = INVENTORY
+        .iter()
+        .map(|s| {
+            let v = lookup(s).expect("INVENTORY entries must be in lookup()");
+            (bark(v.f1), bark(v.f2))
+        })
+        .collect();
+    let f1_min = coords.iter().map(|c| c.0).fold(f64::INFINITY, f64::min);
+    let f1_max = coords.iter().map(|c| c.0).fold(f64::NEG_INFINITY, f64::max);
+    let f2_min = coords.iter().map(|c| c.1).fold(f64::INFINITY, f64::min);
+    let f2_max = coords.iter().map(|c| c.1).fold(f64::NEG_INFINITY, f64::max);
+    ((f1_max - f1_min).powi(2) + (f2_max - f2_min).powi(2)).sqrt()
+});
+/// Returns the cached Bark-span normaliser.
+pub fn bark_span() -> f64 {
+    *BARK_SPAN
+}
+/// Distance between two vowels, scaled into [0, 1].
+///
+/// Returns `None` if either symbol is not in the inventory.
+pub fn distance(p1: &str, p2: &str) -> Option<f64> {
+    if p1 == p2 {
+        return Some(0.0);
+    }
+    let v1 = lookup(p1)?;
+    let v2 = lookup(p2)?;
+    let (a1, b1) = (bark(v1.f1), bark(v1.f2));
+    let (a2, b2) = (bark(v2.f1), bark(v2.f2));
+    let formant_dist = ((a1 - a2).powi(2) + (b1 - b2).powi(2)).sqrt() / bark_span();
+    let mut penalty = formant_dist * VOWEL_SCALE;
+    if v1.rounded != v2.rounded {
+        penalty += ROUNDING_PENALTY;
+    }
+    if v1.rhotic != v2.rhotic {
+        penalty += RHOTICITY_PENALTY;
+    }
+    Some(penalty.min(1.0))
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    /// Tolerance: the Ruby reference uses f64 throughout; we should match
+    /// to at least 12 decimals.
+    const EPS: f64 = 1e-12;
+    /// Reference values produced by the Ruby implementation.
+    /// Bumping a constant here means bumping it in lib/phonetics/distances.rb
+    /// and confirming the parity tests still match.
+    #[test]
+    fn matches_ruby_vowel_distances() {
+        let cases: &[(&str, &str, f64)] = &[
+            ("i", "y", 0.099_760_210_846_103_59),
+            ("i", "ɪ", 0.060_056_384_465_816_57),
+            ("i", "u", 0.565_279_341_709_588),
+            ("a", "ɑ", 0.214_576_497_544_325_4),
+            ("æ", "ɛ", 0.064_974_568_637_334_88),
+            ("ə", "ɝ", 0.241_916_659_928_285_43),
+            ("o", "ə", 0.371_374_251_614_846_3),
+            ("u", "y", 0.465_646_551_803_915_9),
+            ("ʊ", "u", 0.172_060_682_790_273_34),
+        ];
+        for (a, b, expected) in cases {
+            let got = distance(a, b).expect("inventory pair");
+            assert!(
+                (got - expected).abs() < EPS,
+                "distance({a:?}, {b:?}) = {got}, expected {expected}",
+            );
+        }
+    }
+    #[test]
+    fn bark_span_matches_ruby() {
+        assert!((bark_span() - 10.148_711_232_912_262).abs() < EPS);
+    }
+    #[test]
+    fn bark_for_known_frequencies() {
+        // /i/'s F1 = 240 Hz → 2.349… Bark
+        assert!((bark(240.0) - 2.349_000_345_620_559).abs() < EPS);
+        // /a/'s F1 = 850 Hz → 7.501… Bark
+        assert!((bark(850.0) - 7.501_208_750_766_951).abs() < EPS);
+        // Edge: 0 Hz returns 0.
+        assert_eq!(bark(0.0), 0.0);
+    }
+    #[test]
+    fn identity_is_zero() {
+        for s in INVENTORY {
+            assert_eq!(distance(s, s), Some(0.0));
+        }
+    }
+    #[test]
+    fn symmetric() {
+        for a in INVENTORY {
+            for b in INVENTORY {
+                let d_ab = distance(a, b).unwrap();
+                let d_ba = distance(b, a).unwrap();
+                assert!((d_ab - d_ba).abs() < EPS, "asymmetric: {a}/{b}");
+            }
+        }
+    }
+    #[test]
+    fn unknown_symbol_returns_none() {
+        assert!(distance("Z", "i").is_none());
+        assert!(distance("i", "Z").is_none());
+    }
+}

data/lib/phonetics.rb CHANGED Viewed

@@ -1,4 +1,79 @@
 # frozen_string_literal: true
-require 'phonetics/distances'
-require 'phonetics/transcriptions'
+# Phonetics — IPA-based phonetic distance.
+#
+# The entire algorithmic core is written in Rust (see <repo>/rust/
+# phonetics) and loaded as a native extension via Magnus. This file
+# layers ergonomic Ruby idioms on top of the bare module functions
+# that the extension exports.
+#
+# Two-tier distance API:
+#
+#   Phonetics.distance(p1, p2)              acoustic per-phoneme, 0..1
+#   Phonetics.levenshtein(s1, s2)           strict edit distance
+#   Phonetics.confusion(s1, s2)             listener-confusion distance
+#   Phonetics.similarity(s1, s2)            normalised 0..1
+#   Phonetics.sub_cost(p1, p2)              perceptual per-phoneme
+#   Phonetics.tokenize(ipa, boundaries:)    phoneme stream
+require 'delegate'
+require_relative 'phonetics/phonetics_ruby'
+require_relative 'phonetics/transcriptions'
+module Phonetics
+  # The native binding exposes the tokenizer as `_tokenize(input,
+  # boundaries)`. Magnus's `function!` macro doesn't bridge Ruby
+  # keyword arguments through to Rust, so we wrap it in a Ruby method
+  # that does accept the kwarg.
+  def self.tokenize(input, boundaries: false)
+    _tokenize(input, boundaries)
+  end
+  # ------------------------------------------------------------------
+  # Phonetics::String — iterator over phonemes in an IPA string.
+  # ------------------------------------------------------------------
+  class String < SimpleDelegator
+    def each_phoneme(boundaries: false)
+      Phonetics.tokenize(to_s, boundaries: boundaries).each
+    end
+  end
+  # ------------------------------------------------------------------
+  # Backwards-compatible namespaced API.
+  #
+  # The previous Ruby+C implementation exposed these under sub-modules.
+  # Keep them as thin delegators so existing callers don't break —
+  # there's nothing interesting happening here, just forwarding.
+  # ------------------------------------------------------------------
+  module Levenshtein
+    INDEL_COST     = 1.0
+    TRANSPOSE_COST = 0.8
+    def self.distance(s1, s2, _verbose = false)
+      return if s1.nil? || s2.nil?
+      Phonetics.levenshtein(s1, s2)
+    end
+  end
+  module Confusion
+    GAP_OPEN             = 0.60
+    GAP_EXTEND           = 0.25
+    WEAK_INDEL_COST      = 0.15
+    BOUNDARY_INDEL_COST  = 0.02
+    def self.distance(s1, s2, verbose: false)
+      _ = verbose
+      Phonetics.confusion(s1, s2)
+    end
+    def self.similarity(s1, s2)
+      Phonetics.similarity(s1, s2)
+    end
+    def self.sub_cost(a, b)
+      Phonetics.sub_cost(a, b)
+    end
+  end
+end

data/phonetics.gemspec CHANGED Viewed

@@ -1,29 +1,53 @@
 # frozen_string_literal: true
+require_relative 'lib/phonetics/version'
 Gem::Specification.new do |spec|
   spec.name          = 'phonetics'
-  spec.version       = File.read(File.join(File.dirname(__FILE__), './VERSION'))
+  spec.version       = Phonetics::VERSION
   spec.authors       = ['Jack Danger']
   spec.email         = ['github@jackcanty.com']
-  spec.summary       = 'tools for linguistic code using the International Phonetic Alphabet'
-  spec.description   = 'tools for linguistic code using the International Phonetic Alphabet'
+  spec.summary       = 'IPA-based phonetic distance: strict edit distance, listener-confusion distance, and per-phoneme acoustic and perceptual scoring.'
+  spec.description   = <<~DESC
+    Tools for working with the International Phonetic Alphabet. Two-tier
+    distance API — strict acoustic and listener-perception — backed by a
+    Rust core compiled in via Magnus. Calibrated against Mad Gab puzzle
+    data and English speech-perception literature.
+  DESC
   spec.homepage      = 'https://github.com/JackDanger/phonetics'
   spec.license       = 'MIT'
-  spec.required_ruby_version = '>= 2.5'
+  spec.required_ruby_version = '>= 3.0'
+  spec.required_rubygems_version = '>= 3.3.11'
+  spec.metadata['homepage_uri']    = spec.homepage
+  spec.metadata['source_code_uri'] = spec.homepage
-  spec.extensions = ['ext/c_levenshtein/extconf.rb']
+  spec.extensions = ['ext/phonetics_ruby/extconf.rb']
-  # Specify which files should be added to the gem when it is released.
-  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
   spec.files = Dir.chdir(File.expand_path(__dir__)) do
-    `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
+    tracked = `git ls-files -z`.split("\x0").reject do |f|
+      f.match(%r{\A(test|spec|features)/}) ||
+        f.match(%r{\Aext/phonetics_ruby/(target|Cargo.lock|Makefile)})
+    end
+    # The vendored Rust core isn't tracked in git (it's a build
+    # artifact populated by `rake vendor_rust`), but it IS shipped
+    # in the .gem tarball so end users don't need the source
+    # workspace to compile the extension.
+    vendor = Dir.glob('ext/phonetics_ruby/vendor/**/*', File::FNM_DOTMATCH).reject do |p|
+      File.directory?(p) ||
+        p.include?('/target/') ||
+        p.end_with?('Cargo.lock', '/.', '/..')
+    end
+    (tracked + vendor).uniq.sort
   end
   spec.require_paths = ['lib']
+  spec.add_dependency 'rb_sys', '~> 0.9'
   spec.add_development_dependency 'bundler'
-  spec.add_development_dependency 'pry-byebug'
   spec.add_development_dependency 'rake'
   spec.add_development_dependency 'rake-compiler'
   spec.add_development_dependency 'rspec'

metadata CHANGED Viewed

@@ -1,31 +1,31 @@
 --- !ruby/object:Gem::Specification
 name: phonetics
 version: !ruby/object:Gem::Version
-  version: 3.0.9
+  version: 4.0.0
 platform: ruby
 authors:
 - Jack Danger
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-04-30 00:00:00.000000000 Z
+date: 2026-05-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: bundler
+  name: rb_sys
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
-  type: :development
+        version: '0.9'
+  type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '0.9'
 - !ruby/object:Gem::Dependency
-  name: pry-byebug
+  name: bundler
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -94,24 +94,27 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
-description: tools for linguistic code using the International Phonetic Alphabet
+description: |
+  Tools for working with the International Phonetic Alphabet. Two-tier
+  distance API — strict acoustic and listener-perception — backed by a
+  Rust core compiled in via Magnus. Calibrated against Mad Gab puzzle
+  data and English speech-perception literature.
 email:
 - github@jackcanty.com
 executables: []
 extensions:
-- ext/c_levenshtein/extconf.rb
+- ext/phonetics_ruby/extconf.rb
 extra_rdoc_files: []
 files:
-- ".github/workflows/gempush.yml"
-- ".github/workflows/test.yml"
 - ".gitignore"
 - ".rspec"
 - ".rubocop.yml"
+- CHANGELOG
 - CODE_OF_CONDUCT.md
+- Cargo.toml
 - Dockerfile
 - Gemfile
 - LICENSE.txt
-- Makefile
 - README.md
 - Rakefile
 - VERSION
@@ -120,28 +123,36 @@ files:
 - _site/vowel_chart_b_words.jpg
 - bin/console
 - bin/gempush-if-changed
-- ext/c_levenshtein/extconf.rb
-- ext/c_levenshtein/levenshtein.c
-- ext/c_levenshtein/next_phoneme_length.c
-- ext/c_levenshtein/next_phoneme_length.h
-- ext/c_levenshtein/phonemes.c
-- ext/c_levenshtein/phonemes.h
-- ext/c_levenshtein/phonetic_cost.c
-- ext/c_levenshtein/phonetic_cost.h
+- bin/phonetics
+- ext/phonetics_ruby/Cargo.toml
+- ext/phonetics_ruby/build.rs
+- ext/phonetics_ruby/extconf.rb
+- ext/phonetics_ruby/src/lib.rs
+- ext/phonetics_ruby/vendor/phonetics/Cargo.toml
+- ext/phonetics_ruby/vendor/phonetics/README.md
+- ext/phonetics_ruby/vendor/phonetics/src/compounds.rs
+- ext/phonetics_ruby/vendor/phonetics/src/confusion.rs
+- ext/phonetics_ruby/vendor/phonetics/src/consonants.rs
+- ext/phonetics_ruby/vendor/phonetics/src/cross_class.rs
+- ext/phonetics_ruby/vendor/phonetics/src/diacritics.rs
+- ext/phonetics_ruby/vendor/phonetics/src/distance.rs
+- ext/phonetics_ruby/vendor/phonetics/src/levenshtein.rs
+- ext/phonetics_ruby/vendor/phonetics/src/lib.rs
+- ext/phonetics_ruby/vendor/phonetics/src/symbols.rs
+- ext/phonetics_ruby/vendor/phonetics/src/tokenizer.rs
+- ext/phonetics_ruby/vendor/phonetics/src/vowels.rs
 - lib/common_ipa_transcriptions.json
 - lib/phonetics.rb
-- lib/phonetics/code_generator.rb
-- lib/phonetics/distances.rb
-- lib/phonetics/levenshtein.rb
-- lib/phonetics/ruby_levenshtein.rb
 - lib/phonetics/transcriptions.rb
 - lib/phonetics/version.rb
 - phonetics.gemspec
 homepage: https://github.com/JackDanger/phonetics
 licenses:
 - MIT
-metadata: {}
-post_install_message:
+metadata:
+  homepage_uri: https://github.com/JackDanger/phonetics
+  source_code_uri: https://github.com/JackDanger/phonetics
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -149,15 +160,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '2.5'
+      version: '3.0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 3.3.11
 requirements: []
-rubygems_version: 3.3.7
-signing_key:
+rubygems_version: 3.5.22
+signing_key:
 specification_version: 4
-summary: tools for linguistic code using the International Phonetic Alphabet
+summary: 'IPA-based phonetic distance: strict edit distance, listener-confusion distance,
+  and per-phoneme acoustic and perceptual scoring.'
 test_files: []