spellkit 0.1.0.pre.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +448 -0
- data/ext/spellkit/Cargo.lock +402 -0
- data/ext/spellkit/Cargo.toml +21 -0
- data/ext/spellkit/extconf.rb +4 -0
- data/ext/spellkit/src/guards.rs +57 -0
- data/ext/spellkit/src/lib.rs +255 -0
- data/ext/spellkit/src/symspell.rs +264 -0
- data/ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/common.rs +355 -0
- data/ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/dynamic.rs +276 -0
- data/ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/macros.rs +49 -0
- data/ext/spellkit/target/debug/build/rb-sys-4cf7db3819c4a6ed/out/bindings-0.9.117-mri-arm64-darwin24-3.3.0.rs +8902 -0
- data/ext/spellkit/target/debug/build/serde-b1b39c86cf577219/out/private.rs +6 -0
- data/ext/spellkit/target/debug/build/serde_core-7a7752261f0e4007/out/private.rs +5 -0
- data/ext/spellkit/target/debug/incremental/spellkit-10n1yon0n2c8v/s-hbha7isu2i-02ly2uq.lock +0 -0
- data/ext/spellkit/target/debug/incremental/spellkit-2jusczkp089xp/s-hbhcyx6yob-0pqrnyt.lock +0 -0
- data/ext/spellkit/target/debug/incremental/spellkit-39nm03wp54lxw/s-hbhcyx6ynq-08lhwc0.lock +0 -0
- data/lib/spellkit/version.rb +5 -0
- data/lib/spellkit.rb +216 -0
- metadata +123 -0
|
File without changes
|
|
File without changes
|
|
File without changes
|
data/lib/spellkit.rb
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
require_relative "spellkit/version"
|
|
2
|
+
require "uri"
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
begin
|
|
7
|
+
require "spellkit/spellkit"
|
|
8
|
+
rescue LoadError
|
|
9
|
+
require "spellkit.bundle"
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
module SpellKit
|
|
13
|
+
class Error < StandardError; end
|
|
14
|
+
class NotLoadedError < Error; end
|
|
15
|
+
class FileNotFoundError < Error; end
|
|
16
|
+
class InvalidArgumentError < Error; end
|
|
17
|
+
class DownloadError < Error; end
|
|
18
|
+
|
|
19
|
+
# Default dictionary: SymSpell English 80k frequency dictionary
|
|
20
|
+
DEFAULT_DICTIONARY_URL = "https://raw.githubusercontent.com/wolfgarbe/SymSpell/master/SymSpell.FrequencyDictionary/en-80k.txt"
|
|
21
|
+
|
|
22
|
+
class Configuration
|
|
23
|
+
attr_accessor :dictionary, :protected_path, :protected_patterns, :edit_distance, :frequency_threshold
|
|
24
|
+
|
|
25
|
+
def initialize
|
|
26
|
+
@dictionary = DEFAULT_DICTIONARY_URL
|
|
27
|
+
@protected_path = nil
|
|
28
|
+
@protected_patterns = []
|
|
29
|
+
@edit_distance = 1
|
|
30
|
+
@frequency_threshold = 10.0
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def to_h
|
|
34
|
+
{
|
|
35
|
+
dictionary: @dictionary,
|
|
36
|
+
protected_path: @protected_path,
|
|
37
|
+
protected_patterns: @protected_patterns,
|
|
38
|
+
edit_distance: @edit_distance,
|
|
39
|
+
frequency_threshold: @frequency_threshold
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
class << self
|
|
45
|
+
attr_writer :default
|
|
46
|
+
|
|
47
|
+
def configure
|
|
48
|
+
config = Configuration.new
|
|
49
|
+
yield(config)
|
|
50
|
+
@default = Checker.new
|
|
51
|
+
@default.load!(**config.to_h)
|
|
52
|
+
@default
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def default
|
|
56
|
+
@default ||= begin
|
|
57
|
+
checker = Checker.new
|
|
58
|
+
checker.load!(dictionary: DEFAULT_DICTIONARY_URL)
|
|
59
|
+
checker
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Delegation methods
|
|
64
|
+
def load!(**options)
|
|
65
|
+
@default = Checker.new
|
|
66
|
+
@default.load!(**options)
|
|
67
|
+
@default
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def suggest(word, max = 5)
|
|
71
|
+
default.suggest(word, max)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def correct_if_unknown(word, guard: nil)
|
|
75
|
+
default.correct_if_unknown(word, guard: guard)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def correct_tokens(tokens, guard: nil)
|
|
79
|
+
default.correct_tokens(tokens, guard: guard)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def stats
|
|
83
|
+
default.stats
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def healthcheck
|
|
87
|
+
default.healthcheck
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Reopen Rust-defined Checker class to add Ruby wrappers
|
|
93
|
+
class SpellKit::Checker
|
|
94
|
+
# Save original Rust methods
|
|
95
|
+
alias_method :_rust_load!, :load!
|
|
96
|
+
alias_method :_rust_suggest, :suggest
|
|
97
|
+
alias_method :_rust_correct_if_unknown, :correct_if_unknown
|
|
98
|
+
alias_method :_rust_correct_tokens, :correct_tokens
|
|
99
|
+
alias_method :_rust_stats, :stats
|
|
100
|
+
alias_method :_rust_healthcheck, :healthcheck
|
|
101
|
+
|
|
102
|
+
def load!(dictionary: nil, protected_path: nil, protected_patterns: [],
|
|
103
|
+
edit_distance: 1, frequency_threshold: 10.0, **_options)
|
|
104
|
+
|
|
105
|
+
# Validate dictionary parameter
|
|
106
|
+
raise SpellKit::InvalidArgumentError, "dictionary parameter is required" if dictionary.nil?
|
|
107
|
+
|
|
108
|
+
# Auto-detect URL vs path
|
|
109
|
+
dictionary_path = if dictionary.to_s.start_with?("http://", "https://")
|
|
110
|
+
download_dictionary(dictionary)
|
|
111
|
+
else
|
|
112
|
+
dictionary.to_s
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Validate file exists
|
|
116
|
+
raise SpellKit::FileNotFoundError, "Dictionary file not found: #{dictionary_path}" unless File.exist?(dictionary_path)
|
|
117
|
+
|
|
118
|
+
# Validate edit distance
|
|
119
|
+
unless [1, 2].include?(edit_distance)
|
|
120
|
+
raise SpellKit::InvalidArgumentError, "edit_distance must be 1 or 2, got: #{edit_distance}"
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Validate protected_patterns is an array
|
|
124
|
+
unless protected_patterns.is_a?(Array)
|
|
125
|
+
raise SpellKit::InvalidArgumentError, "protected_patterns must be an Array"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
config = {
|
|
129
|
+
"dictionary_path" => dictionary_path,
|
|
130
|
+
"edit_distance" => edit_distance,
|
|
131
|
+
"frequency_threshold" => frequency_threshold
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
config["protected_path"] = protected_path.to_s if protected_path
|
|
135
|
+
|
|
136
|
+
# Convert Ruby Regex objects to strings for Rust
|
|
137
|
+
if protected_patterns.any?
|
|
138
|
+
pattern_strings = protected_patterns.map do |pattern|
|
|
139
|
+
if pattern.is_a?(Regexp)
|
|
140
|
+
pattern.source
|
|
141
|
+
elsif pattern.is_a?(String)
|
|
142
|
+
pattern
|
|
143
|
+
else
|
|
144
|
+
raise SpellKit::InvalidArgumentError, "protected_patterns must contain Regexp or String objects"
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
config["protected_patterns"] = pattern_strings
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
_rust_load!(config)
|
|
151
|
+
self
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def suggest(word, max = 5)
|
|
155
|
+
raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
|
|
156
|
+
raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
|
|
157
|
+
|
|
158
|
+
_rust_suggest(word, max)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def correct_if_unknown(word, guard: nil)
|
|
162
|
+
raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
|
|
163
|
+
raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
|
|
164
|
+
|
|
165
|
+
use_guard = guard == :domain
|
|
166
|
+
_rust_correct_if_unknown(word, use_guard)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def correct_tokens(tokens, guard: nil)
|
|
170
|
+
raise SpellKit::InvalidArgumentError, "tokens must be an Array" unless tokens.is_a?(Array)
|
|
171
|
+
|
|
172
|
+
use_guard = guard == :domain
|
|
173
|
+
_rust_correct_tokens(tokens, use_guard)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def stats
|
|
177
|
+
_rust_stats
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def healthcheck
|
|
181
|
+
_rust_healthcheck
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
private
|
|
185
|
+
|
|
186
|
+
def download_dictionary(url)
|
|
187
|
+
require "digest"
|
|
188
|
+
|
|
189
|
+
# Create cache directory
|
|
190
|
+
cache_dir = File.join(Dir.home, ".cache", "spellkit")
|
|
191
|
+
FileUtils.mkdir_p(cache_dir)
|
|
192
|
+
|
|
193
|
+
# Generate cache filename from URL hash
|
|
194
|
+
url_hash = Digest::SHA256.hexdigest(url)[0..15]
|
|
195
|
+
cache_file = File.join(cache_dir, "dict_#{url_hash}.tsv")
|
|
196
|
+
|
|
197
|
+
# Return cached file if it exists
|
|
198
|
+
return cache_file if File.exist?(cache_file)
|
|
199
|
+
|
|
200
|
+
# Download dictionary
|
|
201
|
+
uri = URI.parse(url)
|
|
202
|
+
response = Net::HTTP.get_response(uri)
|
|
203
|
+
|
|
204
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
205
|
+
raise SpellKit::DownloadError, "Failed to download dictionary from #{url}: #{response.code} #{response.message}"
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Write to cache
|
|
209
|
+
File.write(cache_file, response.body)
|
|
210
|
+
cache_file
|
|
211
|
+
rescue URI::InvalidURIError => e
|
|
212
|
+
raise SpellKit::InvalidArgumentError, "Invalid URL: #{url} (#{e.message})"
|
|
213
|
+
rescue StandardError => e
|
|
214
|
+
raise SpellKit::DownloadError, "Failed to download dictionary: #{e.message}"
|
|
215
|
+
end
|
|
216
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: spellkit
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0.pre.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Chris Petersen
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-09-26 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rb_sys
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0.9'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0.9'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '13.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '13.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rspec
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '3.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '3.0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rake-compiler
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '1.2'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '1.2'
|
|
69
|
+
description: A Ruby gem that provides fast typo correction using SymSpell algorithm,
|
|
70
|
+
with domain-specific term protection
|
|
71
|
+
email:
|
|
72
|
+
- chris@petersen.io
|
|
73
|
+
executables: []
|
|
74
|
+
extensions:
|
|
75
|
+
- ext/spellkit/extconf.rb
|
|
76
|
+
extra_rdoc_files: []
|
|
77
|
+
files:
|
|
78
|
+
- LICENSE
|
|
79
|
+
- README.md
|
|
80
|
+
- ext/spellkit/Cargo.lock
|
|
81
|
+
- ext/spellkit/Cargo.toml
|
|
82
|
+
- ext/spellkit/extconf.rb
|
|
83
|
+
- ext/spellkit/src/guards.rs
|
|
84
|
+
- ext/spellkit/src/lib.rs
|
|
85
|
+
- ext/spellkit/src/symspell.rs
|
|
86
|
+
- ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/common.rs
|
|
87
|
+
- ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/dynamic.rs
|
|
88
|
+
- ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/macros.rs
|
|
89
|
+
- ext/spellkit/target/debug/build/rb-sys-4cf7db3819c4a6ed/out/bindings-0.9.117-mri-arm64-darwin24-3.3.0.rs
|
|
90
|
+
- ext/spellkit/target/debug/build/serde-b1b39c86cf577219/out/private.rs
|
|
91
|
+
- ext/spellkit/target/debug/build/serde_core-7a7752261f0e4007/out/private.rs
|
|
92
|
+
- ext/spellkit/target/debug/incremental/spellkit-10n1yon0n2c8v/s-hbha7isu2i-02ly2uq.lock
|
|
93
|
+
- ext/spellkit/target/debug/incremental/spellkit-2jusczkp089xp/s-hbhcyx6yob-0pqrnyt.lock
|
|
94
|
+
- ext/spellkit/target/debug/incremental/spellkit-39nm03wp54lxw/s-hbhcyx6ynq-08lhwc0.lock
|
|
95
|
+
- lib/spellkit.rb
|
|
96
|
+
- lib/spellkit/version.rb
|
|
97
|
+
homepage: https://github.com/scientist-labs/spellkit
|
|
98
|
+
licenses:
|
|
99
|
+
- MIT
|
|
100
|
+
metadata:
|
|
101
|
+
homepage_uri: https://github.com/scientist-labs/spellkit
|
|
102
|
+
source_code_uri: https://github.com/scientist-labs/spellkit
|
|
103
|
+
changelog_uri: https://github.com/scientist-labs/spellkit/blob/main/CHANGELOG.md
|
|
104
|
+
post_install_message:
|
|
105
|
+
rdoc_options: []
|
|
106
|
+
require_paths:
|
|
107
|
+
- lib
|
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
109
|
+
requirements:
|
|
110
|
+
- - ">="
|
|
111
|
+
- !ruby/object:Gem::Version
|
|
112
|
+
version: 3.1.0
|
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '0'
|
|
118
|
+
requirements: []
|
|
119
|
+
rubygems_version: 3.5.3
|
|
120
|
+
signing_key:
|
|
121
|
+
specification_version: 4
|
|
122
|
+
summary: Fast, safe typo correction for search-term extraction
|
|
123
|
+
test_files: []
|