spellkit 0.1.0.pre.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpellKit
4
- VERSION = "0.1.0.pre.1"
4
+ VERSION = "0.1.0"
5
5
  end
data/lib/spellkit.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require_relative "spellkit/version"
2
2
  require "uri"
3
3
  require "net/http"
4
+ require "openssl"
4
5
  require "fileutils"
5
6
 
6
7
  begin
@@ -67,12 +68,16 @@ module SpellKit
67
68
  @default
68
69
  end
69
70
 
70
- def suggest(word, max = 5)
71
- default.suggest(word, max)
71
+ def suggestions(word, max = 5)
72
+ default.suggestions(word, max)
72
73
  end
73
74
 
74
- def correct_if_unknown(word, guard: nil)
75
- default.correct_if_unknown(word, guard: guard)
75
+ def correct?(word)
76
+ default.correct?(word)
77
+ end
78
+
79
+ def correct(word, guard: nil)
80
+ default.correct(word, guard: guard)
76
81
  end
77
82
 
78
83
  def correct_tokens(tokens, guard: nil)
@@ -93,14 +98,17 @@ end
93
98
  class SpellKit::Checker
94
99
  # Save original Rust methods
95
100
  alias_method :_rust_load!, :load!
96
- alias_method :_rust_suggest, :suggest
97
- alias_method :_rust_correct_if_unknown, :correct_if_unknown
101
+ alias_method :_rust_suggestions, :suggestions
102
+ alias_method :_rust_correct?, :correct?
103
+ alias_method :_rust_correct, :correct
98
104
  alias_method :_rust_correct_tokens, :correct_tokens
99
105
  alias_method :_rust_stats, :stats
100
106
  alias_method :_rust_healthcheck, :healthcheck
101
107
 
102
108
  def load!(dictionary: nil, protected_path: nil, protected_patterns: [],
103
- edit_distance: 1, frequency_threshold: 10.0, **_options)
109
+ edit_distance: 1, frequency_threshold: 10.0,
110
+ skip_urls: false, skip_emails: false, skip_hostnames: false,
111
+ skip_code_patterns: false, skip_numbers: false, **_options)
104
112
 
105
113
  # Validate dictionary parameter
106
114
  raise SpellKit::InvalidArgumentError, "dictionary parameter is required" if dictionary.nil?
@@ -125,6 +133,31 @@ class SpellKit::Checker
125
133
  raise SpellKit::InvalidArgumentError, "protected_patterns must be an Array"
126
134
  end
127
135
 
136
+ # Validate frequency_threshold
137
+ unless frequency_threshold.is_a?(Numeric)
138
+ raise SpellKit::InvalidArgumentError, "frequency_threshold must be a number, got: #{frequency_threshold.class}"
139
+ end
140
+
141
+ unless frequency_threshold.finite?
142
+ raise SpellKit::InvalidArgumentError, "frequency_threshold must be finite (got NaN or Infinity)"
143
+ end
144
+
145
+ if frequency_threshold < 0
146
+ raise SpellKit::InvalidArgumentError, "frequency_threshold must be non-negative, got: #{frequency_threshold}"
147
+ end
148
+
149
+ # Build skip patterns from convenience flags
150
+ skip_patterns = build_skip_patterns(
151
+ skip_urls: skip_urls,
152
+ skip_emails: skip_emails,
153
+ skip_hostnames: skip_hostnames,
154
+ skip_code_patterns: skip_code_patterns,
155
+ skip_numbers: skip_numbers
156
+ )
157
+
158
+ # Merge skip patterns with user-provided patterns
159
+ all_patterns = skip_patterns + protected_patterns
160
+
128
161
  config = {
129
162
  "dictionary_path" => dictionary_path,
130
163
  "edit_distance" => edit_distance,
@@ -133,37 +166,57 @@ class SpellKit::Checker
133
166
 
134
167
  config["protected_path"] = protected_path.to_s if protected_path
135
168
 
136
- # Convert Ruby Regex objects to strings for Rust
137
- if protected_patterns.any?
138
- pattern_strings = protected_patterns.map do |pattern|
169
+ # Convert Ruby Regex objects to hashes with flags for Rust
170
+ if all_patterns.any?
171
+ pattern_objects = all_patterns.map do |pattern|
139
172
  if pattern.is_a?(Regexp)
140
- pattern.source
173
+ # Extract flags from Regexp.options bitmask
174
+ options = pattern.options
175
+ {
176
+ "source" => pattern.source,
177
+ "case_insensitive" => (options & Regexp::IGNORECASE) != 0,
178
+ "multiline" => (options & Regexp::MULTILINE) != 0,
179
+ "extended" => (options & Regexp::EXTENDED) != 0
180
+ }
141
181
  elsif pattern.is_a?(String)
142
- pattern
182
+ # Plain strings default to case-sensitive
183
+ {
184
+ "source" => pattern,
185
+ "case_insensitive" => false,
186
+ "multiline" => false,
187
+ "extended" => false
188
+ }
143
189
  else
144
190
  raise SpellKit::InvalidArgumentError, "protected_patterns must contain Regexp or String objects"
145
191
  end
146
192
  end
147
- config["protected_patterns"] = pattern_strings
193
+ config["protected_patterns"] = pattern_objects
148
194
  end
149
195
 
150
196
  _rust_load!(config)
151
197
  self
152
198
  end
153
199
 
154
- def suggest(word, max = 5)
200
+ def suggestions(word, max = 5)
155
201
  raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
156
202
  raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
157
203
 
158
- _rust_suggest(word, max)
204
+ _rust_suggestions(word, max)
159
205
  end
160
206
 
161
- def correct_if_unknown(word, guard: nil)
207
+ def correct?(word)
208
+ raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
209
+ raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
210
+
211
+ _rust_correct?(word)
212
+ end
213
+
214
+ def correct(word, guard: nil)
162
215
  raise SpellKit::InvalidArgumentError, "word cannot be nil" if word.nil?
163
216
  raise SpellKit::InvalidArgumentError, "word cannot be empty" if word.to_s.empty?
164
217
 
165
218
  use_guard = guard == :domain
166
- _rust_correct_if_unknown(word, use_guard)
219
+ _rust_correct(word, use_guard)
167
220
  end
168
221
 
169
222
  def correct_tokens(tokens, guard: nil)
@@ -183,6 +236,61 @@ class SpellKit::Checker
183
236
 
184
237
  private
185
238
 
239
+ def build_skip_patterns(skip_urls:, skip_emails:, skip_hostnames:, skip_code_patterns:, skip_numbers:)
240
+ patterns = []
241
+
242
+ # Priority 1: URLs, Emails, Hostnames
243
+ if skip_urls
244
+ # Match http:// or https:// URLs
245
+ patterns << /^https?:\/\/[^\s]+$/i
246
+ # Match www. URLs
247
+ patterns << /^www\.[^\s]+$/i
248
+ end
249
+
250
+ if skip_emails
251
+ # Match email addresses: user@domain.com, user+tag@domain.co.uk
252
+ patterns << /^[\w.+-]+@[\w.-]+\.\w+$/i
253
+ end
254
+
255
+ if skip_hostnames
256
+ # Match hostnames: example.com, sub.example.com, my-site.co.uk
257
+ # Must have at least one dot and valid TLD
258
+ patterns << /^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$/i
259
+ end
260
+
261
+ # Priority 2: Code patterns
262
+ if skip_code_patterns
263
+ # Match camelCase: starts lowercase, has uppercase (arrayMap, getElementById)
264
+ patterns << /^[a-z]+[A-Z][a-zA-Z0-9]*$/
265
+ # Match PascalCase: starts uppercase, has mixed case (ArrayList, MyClass)
266
+ patterns << /^[A-Z][a-z]+[A-Z][a-zA-Z0-9]*$/
267
+ # Match snake_case: lowercase with underscores (my_function, API_KEY)
268
+ patterns << /^[a-z]+_[a-z0-9_]+$/i
269
+ # Match SCREAMING_SNAKE_CASE: uppercase with underscores
270
+ patterns << /^[A-Z]+_[A-Z0-9_]+$/
271
+ # Match dotted.paths: identifier.identifier (Array.map, config.yml)
272
+ patterns << /^[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_.]*$/
273
+ end
274
+
275
+ # Priority 3: Numeric patterns
276
+ if skip_numbers
277
+ # Match version numbers: 1.0, 1.2.3, 1.2.3.4
278
+ patterns << /^\d+\.\d+(\.\d+)?(\.\d+)?$/
279
+ # Match hash/IDs: #123, #4567
280
+ patterns << /^#\d+$/
281
+ # Match measurements with common units
282
+ # Weight: kg, g, mg, lb, oz
283
+ # Distance: km, m, cm, mm, mi, ft, in
284
+ # Data: gb, mb, kb, tb, pb
285
+ # Screen: px, pt, em, rem
286
+ patterns << /^\d+(\.\d+)?(kg|g|mg|lb|oz|km|m|cm|mm|mi|ft|in|gb|mb|kb|tb|pb|px|pt|em|rem)$/i
287
+ # Match standalone numbers at start of word (5kg, 123abc)
288
+ patterns << /^\d/
289
+ end
290
+
291
+ patterns
292
+ end
293
+
186
294
  def download_dictionary(url)
187
295
  require "digest"
188
296
 
@@ -197,20 +305,59 @@ class SpellKit::Checker
197
305
  # Return cached file if it exists
198
306
  return cache_file if File.exist?(cache_file)
199
307
 
200
- # Download dictionary
201
- uri = URI.parse(url)
202
- response = Net::HTTP.get_response(uri)
203
-
204
- unless response.is_a?(Net::HTTPSuccess)
205
- raise SpellKit::DownloadError, "Failed to download dictionary from #{url}: #{response.code} #{response.message}"
206
- end
308
+ # Download dictionary with timeout and redirect handling
309
+ body = fetch_with_redirects(url, max_redirects: 5, open_timeout: 10, read_timeout: 30)
207
310
 
208
311
  # Write to cache
209
- File.write(cache_file, response.body)
312
+ File.write(cache_file, body)
210
313
  cache_file
211
314
  rescue URI::InvalidURIError => e
212
315
  raise SpellKit::InvalidArgumentError, "Invalid URL: #{url} (#{e.message})"
316
+ rescue Timeout::Error => e
317
+ raise SpellKit::DownloadError, "Download timed out: #{url} (#{e.message})"
213
318
  rescue StandardError => e
214
319
  raise SpellKit::DownloadError, "Failed to download dictionary: #{e.message}"
215
320
  end
321
+
322
+ def fetch_with_redirects(url, max_redirects: 5, open_timeout: 10, read_timeout: 30, redirect_count: 0)
323
+ raise SpellKit::DownloadError, "Too many redirects (limit: #{max_redirects})" if redirect_count > max_redirects
324
+
325
+ uri = URI.parse(url)
326
+
327
+ # Configure HTTP client with timeouts and SSL verification
328
+ Net::HTTP.start(uri.host, uri.port,
329
+ use_ssl: uri.scheme == "https",
330
+ open_timeout: open_timeout,
331
+ read_timeout: read_timeout,
332
+ verify_mode: OpenSSL::SSL::VERIFY_PEER) do |http|
333
+ request = Net::HTTP::Get.new(uri.request_uri)
334
+ response = http.request(request)
335
+
336
+ case response
337
+ when Net::HTTPSuccess
338
+ response.body
339
+ when Net::HTTPRedirection
340
+ # Follow redirect
341
+ location = response["location"]
342
+ raise SpellKit::DownloadError, "Redirect missing Location header" if location.nil? || location.empty?
343
+
344
+ # Handle relative redirects
345
+ redirect_uri = URI.parse(location)
346
+ redirect_url = redirect_uri.relative? ? URI.join(url, location).to_s : location
347
+
348
+ fetch_with_redirects(redirect_url, max_redirects: max_redirects, open_timeout: open_timeout,
349
+ read_timeout: read_timeout, redirect_count: redirect_count + 1)
350
+ else
351
+ raise SpellKit::DownloadError, "HTTP #{response.code}: #{response.message} (#{url})"
352
+ end
353
+ end
354
+ rescue Net::OpenTimeout => e
355
+ raise Timeout::Error, "Connection timeout after #{open_timeout}s: #{url}"
356
+ rescue Net::ReadTimeout => e
357
+ raise Timeout::Error, "Read timeout after #{read_timeout}s: #{url}"
358
+ rescue SocketError => e
359
+ raise SpellKit::DownloadError, "Network error: #{e.message} (#{url})"
360
+ rescue OpenSSL::SSL::SSLError => e
361
+ raise SpellKit::DownloadError, "SSL verification failed: #{e.message} (#{url})"
362
+ end
216
363
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spellkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.pre.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Petersen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-09-26 00:00:00.000000000 Z
11
+ date: 2025-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '13.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.2'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.2'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -53,19 +67,89 @@ dependencies:
53
67
  - !ruby/object:Gem::Version
54
68
  version: '3.0'
55
69
  - !ruby/object:Gem::Dependency
56
- name: rake-compiler
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: webmock
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: standard
57
99
  requirement: !ruby/object:Gem::Requirement
58
100
  requirements:
59
101
  - - "~>"
60
102
  - !ruby/object:Gem::Version
61
- version: '1.2'
103
+ version: '1.3'
62
104
  type: :development
63
105
  prerelease: false
64
106
  version_requirements: !ruby/object:Gem::Requirement
65
107
  requirements:
66
108
  - - "~>"
67
109
  - !ruby/object:Gem::Version
68
- version: '1.2'
110
+ version: '1.3'
111
+ - !ruby/object:Gem::Dependency
112
+ name: irb
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: benchmark-ips
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: ffi-aspell
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
69
153
  description: A Ruby gem that provides fast typo correction using SymSpell algorithm,
70
154
  with domain-specific term protection
71
155
  email:
@@ -75,7 +159,6 @@ extensions:
75
159
  - ext/spellkit/extconf.rb
76
160
  extra_rdoc_files: []
77
161
  files:
78
- - LICENSE
79
162
  - README.md
80
163
  - ext/spellkit/Cargo.lock
81
164
  - ext/spellkit/Cargo.toml
@@ -87,11 +170,19 @@ files:
87
170
  - ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/dynamic.rs
88
171
  - ext/spellkit/target/debug/build/clang-sys-051521a65ca8f402/out/macros.rs
89
172
  - ext/spellkit/target/debug/build/rb-sys-4cf7db3819c4a6ed/out/bindings-0.9.117-mri-arm64-darwin24-3.3.0.rs
173
+ - ext/spellkit/target/debug/build/rb-sys-ead65721880de65e/out/bindings-0.9.117-mri-arm64-darwin24-3.3.0.rs
90
174
  - ext/spellkit/target/debug/build/serde-b1b39c86cf577219/out/private.rs
91
175
  - ext/spellkit/target/debug/build/serde_core-7a7752261f0e4007/out/private.rs
176
+ - ext/spellkit/target/debug/incremental/spellkit-07yduakb6espe/s-hbic3f250f-1cel1lt.lock
92
177
  - ext/spellkit/target/debug/incremental/spellkit-10n1yon0n2c8v/s-hbha7isu2i-02ly2uq.lock
178
+ - ext/spellkit/target/debug/incremental/spellkit-1d3zzknqc98bj/s-hbic3f250l-011iykk.lock
179
+ - ext/spellkit/target/debug/incremental/spellkit-1pt6om2w642b5/s-hbihepi6zy-1r3p88g.lock
93
180
  - ext/spellkit/target/debug/incremental/spellkit-2jusczkp089xp/s-hbhcyx6yob-0pqrnyt.lock
94
181
  - ext/spellkit/target/debug/incremental/spellkit-39nm03wp54lxw/s-hbhcyx6ynq-08lhwc0.lock
182
+ - ext/spellkit/target/release/build/clang-sys-523e86284ef4dd76/out/common.rs
183
+ - ext/spellkit/target/release/build/clang-sys-523e86284ef4dd76/out/dynamic.rs
184
+ - ext/spellkit/target/release/build/clang-sys-523e86284ef4dd76/out/macros.rs
185
+ - ext/spellkit/target/release/build/rb-sys-7d03ffe964952311/out/bindings-0.9.117-mri-arm64-darwin24-3.3.0.rs
95
186
  - lib/spellkit.rb
96
187
  - lib/spellkit/version.rb
97
188
  homepage: https://github.com/scientist-labs/spellkit
data/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Chris Petersen
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.