spellkit 0.2.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/spellkit.rb ADDED
@@ -0,0 +1,368 @@
1
+ require_relative "spellkit/version"
2
+ require "uri"
3
+ require "net/http"
4
+ require "openssl"
5
+ require "fileutils"
6
+
7
+ # Load the compiled Rust extension. Precompiled (platform) gems install it into a
8
+ # Ruby-ABI-versioned subdir (lib/spellkit/<major.minor>/spellkit.{so,bundle}) so a
9
+ # single fat gem can carry a binary per Ruby version; source/dev builds place it flat
10
+ # at lib/spellkit/spellkit.{so,bundle}. Try the versioned path first, fall back to the
11
+ # flat one. Resolution goes through $LOAD_PATH (`require`, never `require_relative`)
12
+ # because RubyGems installs native extensions outside the gem's lib/ dir.
13
+ begin
14
+ RUBY_VERSION =~ /(\d+\.\d+)/
15
+ require "spellkit/#{Regexp.last_match(1)}/spellkit"
16
+ rescue LoadError
17
+ require "spellkit/spellkit"
18
+ end
19
+
20
+ module SpellKit
21
+ class Error < StandardError; end
22
+ class NotLoadedError < Error; end
23
+ class FileNotFoundError < Error; end
24
+ class InvalidArgumentError < Error; end
25
+ class DownloadError < Error; end
26
+
27
+ # Default dictionary: SymSpell English 80k frequency dictionary
28
+ DEFAULT_DICTIONARY_URL = "https://raw.githubusercontent.com/wolfgarbe/SymSpell/master/SymSpell.FrequencyDictionary/en-80k.txt"
29
+
30
+ class Configuration
31
+ attr_accessor :dictionary, :protected_path, :protected_patterns, :edit_distance, :frequency_threshold
32
+
33
+ def initialize
34
+ @dictionary = DEFAULT_DICTIONARY_URL
35
+ @protected_path = nil
36
+ @protected_patterns = []
37
+ @edit_distance = 1
38
+ @frequency_threshold = 10.0
39
+ end
40
+
41
+ def to_h
42
+ {
43
+ dictionary: @dictionary,
44
+ protected_path: @protected_path,
45
+ protected_patterns: @protected_patterns,
46
+ edit_distance: @edit_distance,
47
+ frequency_threshold: @frequency_threshold
48
+ }
49
+ end
50
+ end
51
+
52
+ class << self
53
+ attr_writer :default
54
+
55
+ def configure
56
+ config = Configuration.new
57
+ yield(config)
58
+ @default = Checker.new
59
+ @default.load!(**config.to_h)
60
+ @default
61
+ end
62
+
63
+ def default
64
+ @default ||= begin
65
+ checker = Checker.new
66
+ checker.load!(dictionary: DEFAULT_DICTIONARY_URL)
67
+ checker
68
+ end
69
+ end
70
+
71
+ # Delegation methods
72
+ def load!(**options)
73
+ @default = Checker.new
74
+ @default.load!(**options)
75
+ @default
76
+ end
77
+
78
+ def suggestions(word, max = 5)
79
+ default.suggestions(word, max)
80
+ end
81
+
82
+ def correct?(word)
83
+ default.correct?(word)
84
+ end
85
+
86
+ def correct(word)
87
+ default.correct(word)
88
+ end
89
+
90
+ def correct_tokens(tokens)
91
+ default.correct_tokens(tokens)
92
+ end
93
+
94
+ def stats
95
+ default.stats
96
+ end
97
+
98
+ def healthcheck
99
+ default.healthcheck
100
+ end
101
+ end
102
+ end
103
+
104
+ # Reopen Rust-defined Checker class to add Ruby wrappers
105
+ class SpellKit::Checker
106
+ # Save original Rust methods
107
+ alias_method :_rust_load!, :load!
108
+ alias_method :_rust_suggestions, :suggestions
109
+ alias_method :_rust_correct?, :correct?
110
+ alias_method :_rust_correct, :correct
111
+ alias_method :_rust_correct_tokens, :correct_tokens
112
+ alias_method :_rust_stats, :stats
113
+ alias_method :_rust_healthcheck, :healthcheck
114
+
115
+ def load!(dictionary: nil, protected_path: nil, protected_patterns: [],
116
+ edit_distance: 1, frequency_threshold: 10.0,
117
+ skip_urls: false, skip_emails: false, skip_hostnames: false,
118
+ skip_code_patterns: false, skip_numbers: false, **_options)
119
+
120
+ # Validate dictionary parameter
121
+ raise SpellKit::InvalidArgumentError, "dictionary parameter is required" if dictionary.nil?
122
+
123
+ # Auto-detect URL vs path
124
+ dictionary_path = if dictionary.to_s.start_with?("http://", "https://")
125
+ download_dictionary(dictionary)
126
+ else
127
+ dictionary.to_s
128
+ end
129
+
130
+ # Validate file exists
131
+ raise SpellKit::FileNotFoundError, "Dictionary file not found: #{dictionary_path}" unless File.exist?(dictionary_path)
132
+
133
+ # Validate edit distance
134
+ unless [1, 2].include?(edit_distance)
135
+ raise SpellKit::InvalidArgumentError, "edit_distance must be 1 or 2, got: #{edit_distance}"
136
+ end
137
+
138
+ # Validate protected_patterns is an array
139
+ unless protected_patterns.is_a?(Array)
140
+ raise SpellKit::InvalidArgumentError, "protected_patterns must be an Array"
141
+ end
142
+
143
+ # Validate frequency_threshold
144
+ unless frequency_threshold.is_a?(Numeric)
145
+ raise SpellKit::InvalidArgumentError, "frequency_threshold must be a number, got: #{frequency_threshold.class}"
146
+ end
147
+
148
+ unless frequency_threshold.finite?
149
+ raise SpellKit::InvalidArgumentError, "frequency_threshold must be finite (got NaN or Infinity)"
150
+ end
151
+
152
+ if frequency_threshold < 0
153
+ raise SpellKit::InvalidArgumentError, "frequency_threshold must be non-negative, got: #{frequency_threshold}"
154
+ end
155
+
156
+ # Build skip patterns from convenience flags
157
+ skip_patterns = build_skip_patterns(
158
+ skip_urls: skip_urls,
159
+ skip_emails: skip_emails,
160
+ skip_hostnames: skip_hostnames,
161
+ skip_code_patterns: skip_code_patterns,
162
+ skip_numbers: skip_numbers
163
+ )
164
+
165
+ # Merge skip patterns with user-provided patterns
166
+ all_patterns = skip_patterns + protected_patterns
167
+
168
+ config = {
169
+ "dictionary_path" => dictionary_path,
170
+ "edit_distance" => edit_distance,
171
+ "frequency_threshold" => frequency_threshold
172
+ }
173
+
174
+ config["protected_path"] = protected_path.to_s if protected_path
175
+
176
+ # Convert Ruby Regex objects to hashes with flags for Rust
177
+ if all_patterns.any?
178
+ pattern_objects = all_patterns.map do |pattern|
179
+ if pattern.is_a?(Regexp)
180
+ # Extract flags from Regexp.options bitmask
181
+ options = pattern.options
182
+ {
183
+ "source" => pattern.source,
184
+ "case_insensitive" => (options & Regexp::IGNORECASE) != 0,
185
+ "multiline" => (options & Regexp::MULTILINE) != 0,
186
+ "extended" => (options & Regexp::EXTENDED) != 0
187
+ }
188
+ elsif pattern.is_a?(String)
189
+ # Plain strings default to case-sensitive
190
+ {
191
+ "source" => pattern,
192
+ "case_insensitive" => false,
193
+ "multiline" => false,
194
+ "extended" => false
195
+ }
196
+ else
197
+ raise SpellKit::InvalidArgumentError, "protected_patterns must contain Regexp or String objects"
198
+ end
199
+ end
200
+ config["protected_patterns"] = pattern_objects
201
+ end
202
+
203
+ _rust_load!(config)
204
+ self
205
+ end
206
+
207
+ def suggestions(word, max = 5)
208
+ raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
209
+ raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
210
+
211
+ _rust_suggestions(word, max)
212
+ end
213
+
214
+ def correct?(word)
215
+ raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
216
+ raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
217
+
218
+ _rust_correct?(word)
219
+ end
220
+
221
+ def correct(word)
222
+ raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
223
+ raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
224
+
225
+ _rust_correct(word)
226
+ end
227
+
228
+ def correct_tokens(tokens)
229
+ raise SpellKit::InvalidArgumentError, "tokens must be an Array" unless tokens.is_a?(Array)
230
+
231
+ _rust_correct_tokens(tokens)
232
+ end
233
+
234
+ def stats
235
+ _rust_stats
236
+ end
237
+
238
+ def healthcheck
239
+ _rust_healthcheck
240
+ end
241
+
242
+ private
243
+
244
+ def build_skip_patterns(skip_urls:, skip_emails:, skip_hostnames:, skip_code_patterns:, skip_numbers:)
245
+ patterns = []
246
+
247
+ # Priority 1: URLs, Emails, Hostnames
248
+ if skip_urls
249
+ # Match http:// or https:// URLs
250
+ patterns << /^https?:\/\/[^\s]+$/i
251
+ # Match www. URLs
252
+ patterns << /^www\.[^\s]+$/i
253
+ end
254
+
255
+ if skip_emails
256
+ # Match email addresses: user@domain.com, user+tag@domain.co.uk
257
+ patterns << /^[\w.+-]+@[\w.-]+\.\w+$/i
258
+ end
259
+
260
+ if skip_hostnames
261
+ # Match hostnames: example.com, sub.example.com, my-site.co.uk
262
+ # Must have at least one dot and valid TLD
263
+ patterns << /^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$/i
264
+ end
265
+
266
+ # Priority 2: Code patterns
267
+ if skip_code_patterns
268
+ # Match camelCase: starts lowercase, has uppercase (arrayMap, getElementById)
269
+ patterns << /^[a-z]+[A-Z][a-zA-Z0-9]*$/
270
+ # Match PascalCase: starts uppercase, has mixed case (ArrayList, MyClass)
271
+ patterns << /^[A-Z][a-z]+[A-Z][a-zA-Z0-9]*$/
272
+ # Match snake_case: lowercase with underscores (my_function, API_KEY)
273
+ patterns << /^[a-z]+_[a-z0-9_]+$/i
274
+ # Match SCREAMING_SNAKE_CASE: uppercase with underscores
275
+ patterns << /^[A-Z]+_[A-Z0-9_]+$/
276
+ # Match dotted.paths: identifier.identifier (Array.map, config.yml)
277
+ patterns << /^[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_.]*$/
278
+ end
279
+
280
+ # Priority 3: Numeric patterns
281
+ if skip_numbers
282
+ # Match version numbers: 1.0, 1.2.3, 1.2.3.4
283
+ patterns << /^\d+\.\d+(\.\d+)?(\.\d+)?$/
284
+ # Match hash/IDs: #123, #4567
285
+ patterns << /^#\d+$/
286
+ # Match measurements with common units
287
+ # Weight: kg, g, mg, lb, oz
288
+ # Distance: km, m, cm, mm, mi, ft, in
289
+ # Data: gb, mb, kb, tb, pb
290
+ # Screen: px, pt, em, rem
291
+ patterns << /^\d+(\.\d+)?(kg|g|mg|lb|oz|km|m|cm|mm|mi|ft|in|gb|mb|kb|tb|pb|px|pt|em|rem)$/i
292
+ # Match standalone numbers at start of word (5kg, 123abc)
293
+ patterns << /^\d/
294
+ end
295
+
296
+ patterns
297
+ end
298
+
299
+ def download_dictionary(url)
300
+ require "digest"
301
+
302
+ # Create cache directory
303
+ cache_dir = File.join(Dir.home, ".cache", "spellkit")
304
+ FileUtils.mkdir_p(cache_dir)
305
+
306
+ # Generate cache filename from URL hash
307
+ url_hash = Digest::SHA256.hexdigest(url)[0..15]
308
+ cache_file = File.join(cache_dir, "dict_#{url_hash}.tsv")
309
+
310
+ # Return cached file if it exists
311
+ return cache_file if File.exist?(cache_file)
312
+
313
+ # Download dictionary with timeout and redirect handling
314
+ body = fetch_with_redirects(url, max_redirects: 5, open_timeout: 10, read_timeout: 30)
315
+
316
+ # Write to cache
317
+ File.write(cache_file, body)
318
+ cache_file
319
+ rescue URI::InvalidURIError => e
320
+ raise SpellKit::InvalidArgumentError, "Invalid URL: #{url} (#{e.message})"
321
+ rescue Timeout::Error => e
322
+ raise SpellKit::DownloadError, "Download timed out: #{url} (#{e.message})"
323
+ rescue StandardError => e
324
+ raise SpellKit::DownloadError, "Failed to download dictionary: #{e.message}"
325
+ end
326
+
327
+ def fetch_with_redirects(url, max_redirects: 5, open_timeout: 10, read_timeout: 30, redirect_count: 0)
328
+ raise SpellKit::DownloadError, "Too many redirects (limit: #{max_redirects})" if redirect_count > max_redirects
329
+
330
+ uri = URI.parse(url)
331
+
332
+ # Configure HTTP client with timeouts and SSL verification
333
+ Net::HTTP.start(uri.host, uri.port,
334
+ use_ssl: uri.scheme == "https",
335
+ open_timeout: open_timeout,
336
+ read_timeout: read_timeout,
337
+ verify_mode: OpenSSL::SSL::VERIFY_PEER) do |http|
338
+ request = Net::HTTP::Get.new(uri.request_uri)
339
+ response = http.request(request)
340
+
341
+ case response
342
+ when Net::HTTPSuccess
343
+ response.body
344
+ when Net::HTTPRedirection
345
+ # Follow redirect
346
+ location = response["location"]
347
+ raise SpellKit::DownloadError, "Redirect missing Location header" if location.nil? || location.empty?
348
+
349
+ # Handle relative redirects
350
+ redirect_uri = URI.parse(location)
351
+ redirect_url = redirect_uri.relative? ? URI.join(url, location).to_s : location
352
+
353
+ fetch_with_redirects(redirect_url, max_redirects: max_redirects, open_timeout: open_timeout,
354
+ read_timeout: read_timeout, redirect_count: redirect_count + 1)
355
+ else
356
+ raise SpellKit::DownloadError, "HTTP #{response.code}: #{response.message} (#{url})"
357
+ end
358
+ end
359
+ rescue Net::OpenTimeout => e
360
+ raise Timeout::Error, "Connection timeout after #{open_timeout}s: #{url}"
361
+ rescue Net::ReadTimeout => e
362
+ raise Timeout::Error, "Read timeout after #{read_timeout}s: #{url}"
363
+ rescue SocketError => e
364
+ raise SpellKit::DownloadError, "Network error: #{e.message} (#{url})"
365
+ rescue OpenSSL::SSL::SSLError => e
366
+ raise SpellKit::DownloadError, "SSL verification failed: #{e.message} (#{url})"
367
+ end
368
+ end
metadata ADDED
@@ -0,0 +1,202 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spellkit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: aarch64-linux
6
+ authors:
7
+ - Chris Petersen
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2026-06-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rb_sys
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '13.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '13.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.2'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: webmock
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: standard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.3'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.3'
111
+ - !ruby/object:Gem::Dependency
112
+ name: irb
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: benchmark-ips
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: ffi-aspell
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: A Ruby gem with a native Rust implementation of the SymSpell algorithm
154
+ for fast typo correction with domain-specific term protection
155
+ email:
156
+ - chris@petersen.io
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - README.md
162
+ - ext/spellkit/Cargo.toml
163
+ - ext/spellkit/extconf.rb
164
+ - ext/spellkit/src/guards.rs
165
+ - ext/spellkit/src/lib.rs
166
+ - ext/spellkit/src/symspell.rs
167
+ - lib/spellkit.rb
168
+ - lib/spellkit/3.1/spellkit.so
169
+ - lib/spellkit/3.2/spellkit.so
170
+ - lib/spellkit/3.3/spellkit.so
171
+ - lib/spellkit/3.4/spellkit.so
172
+ - lib/spellkit/version.rb
173
+ homepage: https://github.com/scientist-labs/spellkit
174
+ licenses:
175
+ - MIT
176
+ metadata:
177
+ homepage_uri: https://github.com/scientist-labs/spellkit
178
+ source_code_uri: https://github.com/scientist-labs/spellkit
179
+ post_install_message:
180
+ rdoc_options: []
181
+ require_paths:
182
+ - lib
183
+ required_ruby_version: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '3.1'
188
+ - - "<"
189
+ - !ruby/object:Gem::Version
190
+ version: 3.5.dev
191
+ required_rubygems_version: !ruby/object:Gem::Requirement
192
+ requirements:
193
+ - - ">="
194
+ - !ruby/object:Gem::Version
195
+ version: '0'
196
+ requirements:
197
+ - Rust >= 1.85
198
+ rubygems_version: 3.5.23
199
+ signing_key:
200
+ specification_version: 4
201
+ summary: Fast, safe typo correction for search-term extraction
202
+ test_files: []