spellkit 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df48970adccf2bea2fae1a69f0e3704c0276bac9ab908a1bc5af6835edde0008
4
- data.tar.gz: 28a8f37086361c742f902fa33bfd39003a1fff758b7ee306de1069344cf114d0
3
+ metadata.gz: 6e690fa50208d003679afff3117b6f00664e6938e7feb992eff6a6544fad279e
4
+ data.tar.gz: 17d41414cbd48e093913cfa8765aac063f1cb17283df13414ad801fdf0aa79ae
5
5
  SHA512:
6
- metadata.gz: fcc1a0678ff2714a8844657f883354594d7bfb5be6e76a2708aa6134fa5ef9ef0a407a4be06a5f727e540ab99e6d21bc086a0bc740d3bd6cc2a5ed7c6d9458b0
7
- data.tar.gz: 043b4f1aa59dd2ced9219f11736053d577d8afa7bdc83cb746ab91c4dc4bb1d5fd28c05aee79c09e739a3b1dd89c49ee088b2c189af81148db09ac40e0de1f06
6
+ metadata.gz: '0989bcacde87e9405c99f8674c681be65844a381cac11a00f8f05dd2f1a54312ce9d1799ca6317688255ee5fad454f4670845163b87e6a9541a981fff8685b35'
7
+ data.tar.gz: f12c69803c39ee74083a2090c85aac7bdfe7c39ac81995a95c09ad8c1e70df35cfb8d332de287413accf99aa4b6960e9e0c5f77ccd39b37d4ae092b06d8ea2d4
data/README.md CHANGED
@@ -146,13 +146,13 @@ SpellKit.load!(
146
146
  protected_patterns: [/^[A-Z]{3,4}\d+$/]
147
147
  )
148
148
 
149
- # Use guard: :domain to enable protection
150
- SpellKit.correct("CDK10", guard: :domain)
149
+ # Protected terms are automatically respected
150
+ SpellKit.correct("CDK10")
151
151
  # => "CDK10" # protected, never changed
152
152
 
153
- # Batch correction with guards
153
+ # Batch correction with protection
154
154
  tokens = %w[helllo wrld ABC-123 for CDK10]
155
- SpellKit.correct_tokens(tokens, guard: :domain)
155
+ SpellKit.correct_tokens(tokens)
156
156
  # => ["hello", "world", "ABC-123", "for", "CDK10"]
157
157
  ```
158
158
 
@@ -300,7 +300,7 @@ SpellKit.correct("rarword") # => "rarword" (no correction if freq < 1000)
300
300
 
301
301
  ### Skip Patterns
302
302
 
303
- SpellKit can automatically skip certain patterns to avoid "correcting" technical terms, URLs, and other special content. Inspired by Aspell's filter modes, these patterns are applied when `guard: :domain` is enabled.
303
+ SpellKit can automatically skip certain patterns to avoid "correcting" technical terms, URLs, and other special content. Inspired by Aspell's filter modes, these patterns are automatically applied when configured.
304
304
 
305
305
  **Available skip patterns:**
306
306
 
@@ -315,13 +315,13 @@ SpellKit.load!(
315
315
  )
316
316
 
317
317
  # With skip patterns enabled, technical content is preserved
318
- SpellKit.correct("https://example.com", guard: :domain) # => "https://example.com"
319
- SpellKit.correct("user@test.com", guard: :domain) # => "user@test.com"
320
- SpellKit.correct("getElementById", guard: :domain) # => "getElementById"
321
- SpellKit.correct("version-1.2.3", guard: :domain) # => "version-1.2.3"
318
+ SpellKit.correct("https://example.com") # => "https://example.com"
319
+ SpellKit.correct("user@test.com") # => "user@test.com"
320
+ SpellKit.correct("getElementById") # => "getElementById"
321
+ SpellKit.correct("version-1.2.3") # => "version-1.2.3"
322
322
 
323
323
  # Regular typos are still corrected
324
- SpellKit.correct("helllo", guard: :domain) # => "hello"
324
+ SpellKit.correct("helllo") # => "hello"
325
325
  ```
326
326
 
327
327
  **What each skip pattern matches:**
@@ -351,9 +351,9 @@ SpellKit.load!(
351
351
  protected_patterns: [/^CUSTOM-\d+$/] # Your custom patterns
352
352
  )
353
353
 
354
- # Both work together
355
- SpellKit.correct("https://example.com", guard: :domain) # => "https://example.com" (skip_urls)
356
- SpellKit.correct("CUSTOM-123", guard: :domain) # => "CUSTOM-123" (custom pattern)
354
+ # Both work together automatically
355
+ SpellKit.correct("https://example.com") # => "https://example.com" (skip_urls)
356
+ SpellKit.correct("CUSTOM-123") # => "CUSTOM-123" (custom pattern)
357
357
  ```
358
358
 
359
359
  ## API Reference
@@ -426,33 +426,32 @@ SpellKit.suggestions("helllo", 5)
426
426
  # => [{"term"=>"hello", "distance"=>1, "freq"=>10000}, ...]
427
427
  ```
428
428
 
429
- ### `SpellKit.correct(word, guard:)`
429
+ ### `SpellKit.correct(word)`
430
430
 
431
- Return corrected word or original if no better match found. Respects `frequency_threshold` configuration.
431
+ Return corrected word or original if no better match found. Respects `frequency_threshold` configuration. Protected terms and skip patterns are automatically applied when configured.
432
432
 
433
433
  **Parameters:**
434
434
  - `word` (required) - The word to correct
435
- - `guard:` (optional) - Set to `:domain` to enable protection checks
436
435
 
437
436
  **Behavior:**
438
437
  - Returns original word if it exists in dictionary
439
438
  - For misspellings, only accepts corrections with frequency ≥ `frequency_threshold`
440
439
  - Returns original word if no corrections pass the threshold
441
- - When `guard: :domain` is set, protected terms and skip patterns are applied
440
+ - Automatically respects protected terms and skip patterns configured in `load!`
442
441
 
443
442
  **Example:**
444
443
  ```ruby
445
- SpellKit.correct("helllo") # => "hello"
446
- SpellKit.correct("hello") # => "hello" (already correct)
447
- SpellKit.correct("CDK10", guard: :domain) # => "CDK10" (protected)
444
+ SpellKit.correct("helllo") # => "hello"
445
+ SpellKit.correct("hello") # => "hello" (already correct)
446
+ SpellKit.correct("CDK10") # => "CDK10" (protected if configured)
448
447
  ```
449
448
 
450
- ### `SpellKit.correct_tokens(tokens, guard:)`
449
+ ### `SpellKit.correct_tokens(tokens)`
451
450
 
452
- Batch correction of an array of tokens. Respects `frequency_threshold` configuration.
451
+ Batch correction of an array of tokens. Respects `frequency_threshold` configuration. Protected terms and skip patterns are automatically applied when configured.
453
452
 
454
- **Options:**
455
- - `guard:` - Set to `:domain` to enable protection checks
453
+ **Parameters:**
454
+ - `tokens` (required) - Array of words to correct
456
455
 
457
456
  **Returns:** Array of corrected strings
458
457
 
@@ -472,7 +471,7 @@ Verify system is properly loaded. Raises error if not.
472
471
 
473
472
  ## Term Protection
474
473
 
475
- The `guard: :domain` option enables protection for specific terms:
474
+ When configured, SpellKit automatically protects specific terms from correction:
476
475
 
477
476
  ### Exact Matches
478
477
  Terms in `protected_path` file are never corrected, even if similar dictionary words exist. Matching is case-insensitive, but original casing is preserved in output.
@@ -536,7 +535,7 @@ end
536
535
  class SearchPreprocessor
537
536
  def self.correct_query(text)
538
537
  tokens = text.downcase.split(/\s+/)
539
- SpellKit.correct_tokens(tokens, guard: :domain).join(" ")
538
+ SpellKit.correct_tokens(tokens).join(" ")
540
539
  end
541
540
  end
542
541
  ```
@@ -554,10 +553,10 @@ end
554
553
  - `correct`: 1,858 i/s (538.17 μs/i)
555
554
  - `correct_tokens` (batch): 2,005 i/s (498.76 μs/i)
556
555
 
557
- **Guard Performance:**
558
- - Without guard: 2,926 i/s (341.79 μs/i)
559
- - With guard: 9,337 i/s (107.10 μs/i) - **3.19x faster!**
560
- *(Guards short-circuit expensive lookups)*
556
+ **Protection Performance:**
557
+ - Without protection: 2,926 i/s (341.79 μs/i)
558
+ - With protection: 9,337 i/s (107.10 μs/i) - **3.19x faster!**
559
+ *(Protection checks short-circuit expensive dictionary lookups)*
561
560
 
562
561
  **Latency Distribution (10,000 iterations):**
563
562
  - p50: 61μs
@@ -52,14 +52,11 @@ fn correct_word(
52
52
  state: &CheckerState,
53
53
  symspell: &SymSpell,
54
54
  word: &str,
55
- use_guard: bool,
56
55
  ) -> String {
57
- // Check if word is protected
58
- if use_guard {
59
- let normalized = SymSpell::normalize_word(word);
60
- if state.guards.is_protected_normalized(word, &normalized) {
61
- return word.to_string();
62
- }
56
+ // Always check if word is protected
57
+ let normalized = SymSpell::normalize_word(word);
58
+ if state.guards.is_protected_normalized(word, &normalized) {
59
+ return word.to_string();
63
60
  }
64
61
 
65
62
  let suggestions = symspell.suggestions(word, 5);
@@ -298,7 +295,7 @@ impl Checker {
298
295
  }
299
296
  }
300
297
 
301
- fn correct_if_unknown(&self, word: String, use_guard: Option<bool>) -> Result<String, Error> {
298
+ fn correct_if_unknown(&self, word: String) -> Result<String, Error> {
302
299
  let ruby = Ruby::get().unwrap();
303
300
  let state = self.state.read().unwrap();
304
301
 
@@ -307,18 +304,17 @@ impl Checker {
307
304
  }
308
305
 
309
306
  if let Some(ref symspell) = state.symspell {
310
- Ok(correct_word(&state, symspell, &word, use_guard.unwrap_or(false)))
307
+ Ok(correct_word(&state, symspell, &word))
311
308
  } else {
312
309
  Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
313
310
  }
314
311
  }
315
312
 
316
- fn correct_tokens(&self, tokens: RArray, use_guard: Option<bool>) -> Result<RArray, Error> {
313
+ fn correct_tokens(&self, tokens: RArray) -> Result<RArray, Error> {
317
314
  // Optimize batch correction by acquiring lock once for all tokens
318
315
  // instead of calling correct_if_unknown per token (which re-locks each time)
319
316
  let ruby = Ruby::get().unwrap();
320
317
  let state = self.state.read().unwrap();
321
- let use_guard = use_guard.unwrap_or(false);
322
318
 
323
319
  if !state.loaded {
324
320
  return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
@@ -329,7 +325,7 @@ impl Checker {
329
325
  if let Some(ref symspell) = state.symspell {
330
326
  for token in tokens.into_iter() {
331
327
  let word: String = TryConvert::try_convert(token)?;
332
- let corrected = correct_word(&state, symspell, &word, use_guard);
328
+ let corrected = correct_word(&state, symspell, &word);
333
329
  result.push(corrected)?;
334
330
  }
335
331
 
@@ -388,8 +384,8 @@ fn init(_ruby: &Ruby) -> Result<(), Error> {
388
384
  checker_class.define_method("load!", method!(Checker::load_full, 1))?;
389
385
  checker_class.define_method("suggestions", method!(Checker::suggestions, 2))?;
390
386
  checker_class.define_method("correct?", method!(Checker::correct, 1))?;
391
- checker_class.define_method("correct", method!(Checker::correct_if_unknown, 2))?;
392
- checker_class.define_method("correct_tokens", method!(Checker::correct_tokens, 2))?;
387
+ checker_class.define_method("correct", method!(Checker::correct_if_unknown, 1))?;
388
+ checker_class.define_method("correct_tokens", method!(Checker::correct_tokens, 1))?;
393
389
  checker_class.define_method("stats", method!(Checker::stats, 0))?;
394
390
  checker_class.define_method("healthcheck", method!(Checker::healthcheck, 0))?;
395
391
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpellKit
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/spellkit.rb CHANGED
@@ -76,12 +76,12 @@ module SpellKit
76
76
  default.correct?(word)
77
77
  end
78
78
 
79
- def correct(word, guard: nil)
80
- default.correct(word, guard: guard)
79
+ def correct(word)
80
+ default.correct(word)
81
81
  end
82
82
 
83
- def correct_tokens(tokens, guard: nil)
84
- default.correct_tokens(tokens, guard: guard)
83
+ def correct_tokens(tokens)
84
+ default.correct_tokens(tokens)
85
85
  end
86
86
 
87
87
  def stats
@@ -211,19 +211,17 @@ class SpellKit::Checker
211
211
  _rust_correct?(word)
212
212
  end
213
213
 
214
- def correct(word, guard: nil)
214
+ def correct(word)
215
215
  raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
216
216
  raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
217
217
 
218
- use_guard = guard == :domain
219
- _rust_correct(word, use_guard)
218
+ _rust_correct(word)
220
219
  end
221
220
 
222
- def correct_tokens(tokens, guard: nil)
221
+ def correct_tokens(tokens)
223
222
  raise SpellKit::InvalidArgumentError, "tokens must be an Array" unless tokens.is_a?(Array)
224
223
 
225
- use_guard = guard == :domain
226
- _rust_correct_tokens(tokens, use_guard)
224
+ _rust_correct_tokens(tokens)
227
225
  end
228
226
 
229
227
  def stats
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spellkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Petersen