RubyGems - spellkit - Versions diffs - 0.1.0.pre.1 → 0.1.1 - Mend

spellkit 0.1.0.pre.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/README.md +259 -33
data/ext/spellkit/Cargo.lock +0 -57
data/ext/spellkit/Cargo.toml +0 -2
data/ext/spellkit/src/guards.rs +21 -3
data/ext/spellkit/src/lib.rs +213 -75
data/ext/spellkit/src/symspell.rs +115 -30
data/ext/spellkit/target/debug/build/rb-sys-ead65721880de65e/out/bindings-0.9.117-mri-arm64-darwin24-3.3.0.rs +8902 -0
data/ext/spellkit/target/debug/incremental/spellkit-07yduakb6espe/s-hbic3f250f-1cel1lt.lock +0 -0
data/ext/spellkit/target/debug/incremental/spellkit-1d3zzknqc98bj/s-hbic3f250l-011iykk.lock +0 -0
data/ext/spellkit/target/debug/incremental/spellkit-1pt6om2w642b5/s-hbihepi6zy-1r3p88g.lock +0 -0
data/ext/spellkit/target/release/build/clang-sys-523e86284ef4dd76/out/common.rs +355 -0
data/ext/spellkit/target/release/build/clang-sys-523e86284ef4dd76/out/dynamic.rs +276 -0
data/ext/spellkit/target/release/build/clang-sys-523e86284ef4dd76/out/macros.rs +49 -0
data/ext/spellkit/target/release/build/rb-sys-7d03ffe964952311/out/bindings-0.9.117-mri-arm64-darwin24-3.3.0.rs +8902 -0
data/lib/spellkit/version.rb +1 -1
data/lib/spellkit.rb +176 -31
metadata +97 -6
data/LICENSE +0 -21

data/ext/spellkit/src/lib.rs CHANGED Viewed

@@ -22,6 +22,10 @@ struct CheckerState {
     loaded_at: Option<u64>,
     dictionary_size: usize,
     edit_distance: usize,
+    skipped_malformed: usize,
+    skipped_multiword: usize,
+    skipped_invalid_freq: usize,
+    skipped_duplicates: usize,
 }
 impl CheckerState {
@@ -34,10 +38,60 @@ impl CheckerState {
             loaded_at: None,
             dictionary_size: 0,
             edit_distance: 1,
+            skipped_malformed: 0,
+            skipped_multiword: 0,
+            skipped_invalid_freq: 0,
+            skipped_duplicates: 0,
         }
     }
 }
+// Helper function to correct a single word
+// Returns the corrected word or the original if no correction is appropriate
+fn correct_word(
+    state: &CheckerState,
+    symspell: &SymSpell,
+    word: &str,
+) -> String {
+    // Always check if word is protected
+    let normalized = SymSpell::normalize_word(word);
+    if state.guards.is_protected_normalized(word, &normalized) {
+        return word.to_string();
+    }
+    let suggestions = symspell.suggestions(word, 5);
+    // If exact match exists, return canonical form from dictionary
+    if !suggestions.is_empty() && suggestions[0].distance == 0 {
+        return suggestions[0].term.clone();
+    }
+    // Get original word's frequency (if it exists in dictionary)
+    let original_freq = symspell.get_frequency(word);
+    // Find best correction with frequency threshold
+    for suggestion in &suggestions {
+        if suggestion.distance <= state.edit_distance {
+            // Apply frequency threshold
+            let passes_threshold = match original_freq {
+                // Word not in dictionary: require suggestion frequency >= absolute threshold
+                None => suggestion.frequency as f64 >= state.frequency_threshold,
+                // Word in dictionary: require suggestion frequency >= threshold * original frequency
+                Some(orig_freq) => {
+                    suggestion.frequency as f64 >= state.frequency_threshold * orig_freq as f64
+                }
+            };
+            if passes_threshold {
+                return suggestion.term.clone();
+            }
+        }
+    }
+    // No suggestions passed the threshold
+    word.to_string()
+}
 impl Checker {
     fn new() -> Self {
         Self {
@@ -54,56 +108,129 @@ impl Checker {
                 .map_err(|_| Error::new(ruby.exception_arg_error(), "dictionary_path is required"))?
         )?;
-        let content = std::fs::read_to_string(&dictionary_path)
-            .map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Failed to read dictionary file: {}", e)))?;
+        // Optional: edit distance
+        let edit_dist: usize = config.get("edit_distance")
+            .and_then(|v: Value| TryConvert::try_convert(v).ok())
+            .unwrap_or(1);
-    // Optional: edit distance
-    let edit_dist: usize = config.get("edit_distance")
-        .and_then(|v: Value| TryConvert::try_convert(v).ok())
-        .unwrap_or(1);
+        if edit_dist > 2 {
+            return Err(Error::new(ruby.exception_arg_error(), "edit_distance must be 1 or 2"));
+        }
-    if edit_dist > 2 {
-        return Err(Error::new(ruby.exception_arg_error(), "edit_distance must be 1 or 2"));
-    }
+        // Stream dictionary loading: read line-by-line and add directly to SymSpell
+        // This avoids buffering the entire file and intermediate Vec allocation
+        let file = std::fs::File::open(&dictionary_path)
+            .map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Failed to open dictionary file: {}", e)))?;
+        let reader = std::io::BufReader::new(file);
+        let mut symspell = SymSpell::new(edit_dist);
+        let mut dictionary_size = 0;
+        let mut skipped_malformed = 0;
+        let mut skipped_multiword = 0;
+        let mut skipped_invalid_freq = 0;
+        let mut skipped_duplicates = 0;
+        use std::io::BufRead;
+        for line in reader.lines() {
+            let line = line.map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Failed to read line: {}", e)))?;
+            // Try tab-separated first (allows multi-word terms), then space-separated (SymSpell format)
+            let parts: Vec<&str> = if line.contains('\t') {
+                line.split('\t').collect()
+            } else {
+                line.split_whitespace().collect()
+            };
+            // Validate we have exactly 2 columns (term and frequency)
+            if parts.len() != 2 {
+                skipped_malformed += 1;
+                continue;
+            }
+            let term = parts[0].trim();
+            let freq_str = parts[1].trim();
-    let mut words = Vec::new();
-    for line in content.lines() {
-        let parts: Vec<&str> = line.split_whitespace().collect();
-        if parts.len() == 2 {
-            if let Ok(freq) = parts[1].parse::<u64>() {
-                words.push((parts[0].to_string(), freq));
+            // Skip empty terms or frequencies
+            if term.is_empty() || freq_str.is_empty() {
+                skipped_malformed += 1;
+                continue;
             }
-        }
-    }
-    let dictionary_size = words.len();
-    let mut symspell = SymSpell::new(edit_dist);
-    symspell.load_dictionary(words);
+            // Check for multi-word terms (SymSpell algorithm doesn't support phrases)
+            if term.contains(char::is_whitespace) {
+                skipped_multiword += 1;
+                continue;
+            }
-    let mut guards = Guards::new();
+            // Parse frequency
+            match freq_str.parse::<u64>() {
+                Ok(freq) => {
+                    let normalized = SymSpell::normalize_word(term);
+                    let was_new = symspell.add_word(&normalized, term, freq);
+                    if was_new {
+                        dictionary_size += 1;
+                    } else {
+                        skipped_duplicates += 1;
+                    }
+                }
+                Err(_) => {
+                    skipped_invalid_freq += 1;
+                }
+            }
+        }
-    // Load optional protected terms file
-    if let Some(protected_path) = config.get("protected_path") {
-        let path: String = TryConvert::try_convert(protected_path)?;
-        if let Ok(content) = std::fs::read_to_string(path) {
+        let mut guards = Guards::new();
+        // Load optional protected terms file
+        if let Some(protected_path) = config.get("protected_path") {
+            let path: String = TryConvert::try_convert(protected_path)?;
+            let content = std::fs::read_to_string(&path)
+                .map_err(|e| Error::new(ruby.exception_runtime_error(),
+                    format!("Failed to read protected terms file '{}': {}", path, e)))?;
             guards.load_protected(&content);
         }
-    }
-    // Load optional protected patterns
-    if let Some(patterns_value) = config.get("protected_patterns") {
-        let patterns: RArray = TryConvert::try_convert(patterns_value)?;
-        for pattern_value in patterns.into_iter() {
-            let pattern: String = TryConvert::try_convert(pattern_value)?;
-            guards.add_pattern(&pattern)
-                .map_err(|e| Error::new(ruby.exception_arg_error(), e))?;
+        // Load optional protected patterns
+        if let Some(patterns_value) = config.get("protected_patterns") {
+            let patterns: RArray = TryConvert::try_convert(patterns_value)?;
+            for pattern_value in patterns.into_iter() {
+                let pattern_hash: RHash = TryConvert::try_convert(pattern_value)?;
+                let source: String = TryConvert::try_convert(
+                    pattern_hash.fetch::<_, Value>("source")
+                        .map_err(|_| Error::new(ruby.exception_arg_error(), "pattern hash missing 'source' key"))?
+                )?;
+                let case_insensitive: bool = pattern_hash.get("case_insensitive")
+                    .and_then(|v: Value| TryConvert::try_convert(v).ok())
+                    .unwrap_or(false);
+                let multiline: bool = pattern_hash.get("multiline")
+                    .and_then(|v: Value| TryConvert::try_convert(v).ok())
+                    .unwrap_or(false);
+                let extended: bool = pattern_hash.get("extended")
+                    .and_then(|v: Value| TryConvert::try_convert(v).ok())
+                    .unwrap_or(false);
+                guards.add_pattern_with_flags(&source, case_insensitive, multiline, extended)
+                    .map_err(|e| Error::new(ruby.exception_arg_error(), e))?;
+            }
         }
-    }
-    // Optional frequency threshold
-    let frequency_threshold: f64 = config.get("frequency_threshold")
-        .and_then(|v: Value| TryConvert::try_convert(v).ok())
-        .unwrap_or(10.0);
+        // Optional frequency threshold
+        let frequency_threshold: f64 = config.get("frequency_threshold")
+            .and_then(|v: Value| TryConvert::try_convert(v).ok())
+            .unwrap_or(10.0);
+        // Validate frequency threshold
+        if !frequency_threshold.is_finite() {
+            return Err(Error::new(ruby.exception_arg_error(), "frequency_threshold must be finite (not NaN or Infinity)"));
+        }
+        if frequency_threshold < 0.0 {
+            return Err(Error::new(ruby.exception_arg_error(), format!("frequency_threshold must be non-negative, got: {}", frequency_threshold)));
+        }
         let loaded_at = SystemTime::now()
             .duration_since(UNIX_EPOCH)
@@ -118,11 +245,15 @@ impl Checker {
         state.loaded_at = loaded_at;
         state.dictionary_size = dictionary_size;
         state.edit_distance = edit_dist;
+        state.skipped_malformed = skipped_malformed;
+        state.skipped_multiword = skipped_multiword;
+        state.skipped_invalid_freq = skipped_invalid_freq;
+        state.skipped_duplicates = skipped_duplicates;
         Ok(())
     }
-    fn suggest(&self, word: String, max: Option<usize>) -> Result<RArray, Error> {
+    fn suggestions(&self, word: String, max: Option<usize>) -> Result<RArray, Error> {
         let ruby = Ruby::get().unwrap();
         let max_suggestions = max.unwrap_or(5);
         let state = self.state.read().unwrap();
@@ -132,7 +263,7 @@ impl Checker {
         }
         if let Some(ref symspell) = state.symspell {
-            let suggestions = symspell.suggest(&word, max_suggestions);
+            let suggestions = symspell.suggestions(&word, max_suggestions);
             let result = RArray::new();
             for suggestion in suggestions {
@@ -149,7 +280,7 @@ impl Checker {
         }
     }
-    fn correct_if_unknown(&self, word: String, use_guard: Option<bool>) -> Result<String, Error> {
+    fn correct(&self, word: String) -> Result<bool, Error> {
         let ruby = Ruby::get().unwrap();
         let state = self.state.read().unwrap();
@@ -157,49 +288,51 @@ impl Checker {
             return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
         }
-        // Check if word is protected
-        if use_guard.unwrap_or(false) {
-            let normalized = SymSpell::normalize_word(&word);
-            if state.guards.is_protected_normalized(&word, &normalized) {
-                return Ok(word);
-            }
-        }
         if let Some(ref symspell) = state.symspell {
-            let suggestions = symspell.suggest(&word, 5);
+            Ok(symspell.contains(&word))
+        } else {
+            Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
+        }
+    }
-            // If exact match exists, return original
-            if !suggestions.is_empty() && suggestions[0].distance == 0 {
-                return Ok(word);
-            }
+    fn correct_if_unknown(&self, word: String) -> Result<String, Error> {
+        let ruby = Ruby::get().unwrap();
+        let state = self.state.read().unwrap();
-            // Find best correction with frequency gating
-            for suggestion in &suggestions {
-                if suggestion.distance <= 1 {
-                    // Check frequency threshold - correction should be significantly more common
-                    // Since we don't have the original word's frequency, we'll just take any ED=1 match
-                    // In a full implementation, we'd check if suggestion.frequency >= threshold * original_freq
-                    return Ok(suggestion.term.clone());
-                }
-            }
+        if !state.loaded {
+            return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
+        }
-            Ok(word)
+        if let Some(ref symspell) = state.symspell {
+            Ok(correct_word(&state, symspell, &word))
         } else {
             Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
         }
     }
-    fn correct_tokens(&self, tokens: RArray, use_guard: Option<bool>) -> Result<RArray, Error> {
-        let result = RArray::new();
-        let guard = use_guard.unwrap_or(false);
+    fn correct_tokens(&self, tokens: RArray) -> Result<RArray, Error> {
+        // Optimize batch correction by acquiring lock once for all tokens
+        // instead of calling correct_if_unknown per token (which re-locks each time)
+        let ruby = Ruby::get().unwrap();
+        let state = self.state.read().unwrap();
-        for token in tokens.into_iter() {
-            let word: String = TryConvert::try_convert(token)?;
-            let corrected = self.correct_if_unknown(word, Some(guard))?;
-            result.push(corrected)?;
+        if !state.loaded {
+            return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
         }
-        Ok(result)
+        let result = RArray::new();
+        if let Some(ref symspell) = state.symspell {
+            for token in tokens.into_iter() {
+                let word: String = TryConvert::try_convert(token)?;
+                let corrected = correct_word(&state, symspell, &word);
+                result.push(corrected)?;
+            }
+            Ok(result)
+        } else {
+            Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
+        }
     }
     fn stats(&self) -> Result<RHash, Error> {
@@ -214,6 +347,10 @@ impl Checker {
         stats.aset("loaded", true)?;
         stats.aset("dictionary_size", state.dictionary_size)?;
         stats.aset("edit_distance", state.edit_distance)?;
+        stats.aset("skipped_malformed", state.skipped_malformed)?;
+        stats.aset("skipped_multiword", state.skipped_multiword)?;
+        stats.aset("skipped_invalid_freq", state.skipped_invalid_freq)?;
+        stats.aset("skipped_duplicates", state.skipped_duplicates)?;
         if let Some(loaded_at) = state.loaded_at {
             stats.aset("loaded_at", loaded_at)?;
@@ -245,9 +382,10 @@ fn init(_ruby: &Ruby) -> Result<(), Error> {
     checker_class.define_singleton_method("new", function!(Checker::new, 0))?;
     checker_class.define_method("load!", method!(Checker::load_full, 1))?;
-    checker_class.define_method("suggest", method!(Checker::suggest, 2))?;
-    checker_class.define_method("correct_if_unknown", method!(Checker::correct_if_unknown, 2))?;
-    checker_class.define_method("correct_tokens", method!(Checker::correct_tokens, 2))?;
+    checker_class.define_method("suggestions", method!(Checker::suggestions, 2))?;
+    checker_class.define_method("correct?", method!(Checker::correct, 1))?;
+    checker_class.define_method("correct", method!(Checker::correct_if_unknown, 1))?;
+    checker_class.define_method("correct_tokens", method!(Checker::correct_tokens, 1))?;
     checker_class.define_method("stats", method!(Checker::stats, 0))?;
     checker_class.define_method("healthcheck", method!(Checker::healthcheck, 0))?;

data/ext/spellkit/src/symspell.rs CHANGED Viewed

@@ -2,6 +2,12 @@ use hashbrown::{HashMap, HashSet};
 use std::cmp::Ordering;
 use unicode_normalization::UnicodeNormalization;
+#[derive(Debug, Clone)]
+pub struct WordEntry {
+    pub canonical: String,
+    pub frequency: u64,
+}
 #[derive(Debug, Clone)]
 pub struct Suggestion {
     pub term: String,
@@ -44,7 +50,7 @@ impl Eq for Suggestion {}
 pub struct SymSpell {
     deletes: HashMap<String, HashSet<String>>,
-    words: HashMap<String, u64>,
+    words: HashMap<String, WordEntry>,
     max_edit_distance: usize,
 }
@@ -64,23 +70,44 @@ impl SymSpell {
             .to_lowercase()
     }
-    pub fn load_dictionary(&mut self, words: Vec<(String, u64)>) {
-        for (word, freq) in words {
-            let normalized = Self::normalize_word(&word);
-            self.add_word(&normalized, freq);
-        }
-    }
+    pub fn add_word(&mut self, normalized: &str, canonical: &str, frequency: u64) -> bool {
+        let normalized_key = normalized.to_string();
-    pub fn add_word(&mut self, word: &str, frequency: u64) {
-        self.words.insert(word.to_string(), frequency);
+        let was_new = if let Some(existing) = self.words.get_mut(&normalized_key) {
+            // Duplicate: sum frequencies and keep highest-frequency canonical form
+            let new_total_freq = existing.frequency + frequency;
+            // Keep the canonical form from the higher-frequency variant
+            if frequency > existing.frequency {
+                existing.canonical = canonical.to_string();
+            }
-        let deletes = self.get_deletes(word, self.max_edit_distance);
-        for delete in deletes {
-            self.deletes
-                .entry(delete)
-                .or_insert_with(HashSet::new)
-                .insert(word.to_string());
+            existing.frequency = new_total_freq;
+            false
+        } else {
+            // New entry
+            self.words.insert(
+                normalized_key.clone(),
+                WordEntry {
+                    canonical: canonical.to_string(),
+                    frequency,
+                },
+            );
+            true
+        };
+        // Only generate deletes for new entries (avoid redundant work)
+        if was_new {
+            let deletes = self.get_deletes(normalized, self.max_edit_distance);
+            for delete in deletes {
+                self.deletes
+                    .entry(delete)
+                    .or_insert_with(HashSet::new)
+                    .insert(normalized_key.clone());
+            }
         }
+        was_new
     }
     fn get_deletes(&self, word: &str, edit_distance: usize) -> HashSet<String> {
@@ -101,8 +128,10 @@ impl SymSpell {
                 processed.insert(item.clone());
                 for delete in self.generate_deletes(&item) {
-                    if delete.len() >= 1 {
-                        deletes.insert(delete.clone());
+                    deletes.insert(delete.clone());
+                    // Only continue processing non-empty strings to avoid infinite loops
+                    if !delete.is_empty() {
                         temp_queue.push(delete);
                     }
                 }
@@ -130,13 +159,23 @@ impl SymSpell {
         deletes
     }
-    pub fn suggest(&self, word: &str, max_suggestions: usize) -> Vec<Suggestion> {
+    pub fn contains(&self, word: &str) -> bool {
+        let normalized = Self::normalize_word(word);
+        self.words.contains_key(&normalized)
+    }
+    pub fn get_frequency(&self, word: &str) -> Option<u64> {
+        let normalized = Self::normalize_word(word);
+        self.words.get(&normalized).map(|entry| entry.frequency)
+    }
+    pub fn suggestions(&self, word: &str, max_suggestions: usize) -> Vec<Suggestion> {
         let normalized = Self::normalize_word(word);
         let mut suggestions = Vec::new();
         let mut seen = HashSet::new();
-        if let Some(&freq) = self.words.get(&normalized) {
-            suggestions.push(Suggestion::new(normalized.clone(), 0, freq));
+        if let Some(entry) = self.words.get(&normalized) {
+            suggestions.push(Suggestion::new(entry.canonical.clone(), 0, entry.frequency));
             seen.insert(normalized.clone());
         }
@@ -145,10 +184,10 @@ impl SymSpell {
         for delete in &input_deletes {
             // Check if this delete is itself a dictionary word (important for finding words shorter than input)
             if !seen.contains(delete) {
-                if let Some(&freq) = self.words.get(delete) {
+                if let Some(entry) = self.words.get(delete) {
                     let distance = self.edit_distance(&normalized, delete);
                     if distance <= self.max_edit_distance {
-                        suggestions.push(Suggestion::new(delete.clone(), distance, freq));
+                        suggestions.push(Suggestion::new(entry.canonical.clone(), distance, entry.frequency));
                         seen.insert(delete.clone());
                     }
                 }
@@ -163,8 +202,8 @@ impl SymSpell {
                     let distance = self.edit_distance(&normalized, candidate);
                     if distance <= self.max_edit_distance {
-                        if let Some(&freq) = self.words.get(candidate) {
-                            suggestions.push(Suggestion::new(candidate.clone(), distance, freq));
+                        if let Some(entry) = self.words.get(candidate) {
+                            suggestions.push(Suggestion::new(entry.canonical.clone(), distance, entry.frequency));
                             seen.insert(candidate.clone());
                         }
                     }
@@ -180,8 +219,8 @@ impl SymSpell {
                 let distance = self.edit_distance(&normalized, candidate);
                 if distance <= self.max_edit_distance {
-                    if let Some(&freq) = self.words.get(candidate) {
-                        suggestions.push(Suggestion::new(candidate.clone(), distance, freq));
+                    if let Some(entry) = self.words.get(candidate) {
+                        suggestions.push(Suggestion::new(entry.canonical.clone(), distance, entry.frequency));
                         seen.insert(candidate.clone());
                     }
                 }
@@ -252,13 +291,59 @@ mod tests {
     #[test]
     fn test_suggestions() {
         let mut symspell = SymSpell::new(2);
-        symspell.add_word("hello", 1000);
-        symspell.add_word("hell", 500);
-        symspell.add_word("help", 750);
+        symspell.add_word("hello", "hello", 1000);
+        symspell.add_word("hell", "hell", 500);
+        symspell.add_word("help", "help", 750);
-        let suggestions = symspell.suggest("helo", 3);
+        let suggestions = symspell.suggestions("helo", 3);
         assert!(!suggestions.is_empty());
         assert_eq!(suggestions[0].term, "hello");
         assert_eq!(suggestions[0].distance, 1);
     }
+    #[test]
+    fn test_single_character_corrections() {
+        let mut symspell = SymSpell::new(1);
+        symspell.add_word("a", "a", 10000);
+        symspell.add_word("i", "I", 8000);
+        symspell.add_word("o", "o", 6000);
+        let suggestions = symspell.suggestions("x", 5);
+        assert!(!suggestions.is_empty(), "Single-character corrections should work");
+        assert!(suggestions.iter().any(|s| s.term == "a"), "Should suggest 'a' for 'x'");
+        let suggestions_for_j = symspell.suggestions("j", 5);
+        assert!(!suggestions_for_j.is_empty(), "Should find suggestions for 'j'");
+        assert!(suggestions_for_j.iter().any(|s| s.term == "I"), "Should suggest canonical 'I' (not 'i')");
+    }
+    #[test]
+    fn test_duplicate_entries_keep_highest_frequency_canonical() {
+        let mut symspell = SymSpell::new(1);
+        // Add high-frequency lowercase variant
+        symspell.add_word("hello", "hello", 10000);
+        // Add low-frequency uppercase variant (should not replace canonical)
+        symspell.add_word("hello", "HELLO", 100);
+        let suggestions = symspell.suggestions("hello", 1);
+        assert_eq!(suggestions.len(), 1);
+        assert_eq!(suggestions[0].term, "hello", "Should keep high-frequency 'hello' as canonical, not 'HELLO'");
+        assert_eq!(suggestions[0].frequency, 10100, "Should sum frequencies: 10000 + 100 = 10100");
+        // Verify reverse order also works
+        let mut symspell2 = SymSpell::new(1);
+        // Add low-frequency first
+        symspell2.add_word("world", "WORLD", 100);
+        // Add high-frequency second (should replace canonical)
+        symspell2.add_word("world", "world", 10000);
+        let suggestions2 = symspell2.suggestions("world", 1);
+        assert_eq!(suggestions2.len(), 1);
+        assert_eq!(suggestions2[0].term, "world", "Should update to high-frequency 'world' canonical");
+        assert_eq!(suggestions2[0].frequency, 10100, "Should sum frequencies");
+    }
 }